[Box Backup-commit] COMMIT r2963 - in box/trunk: . lib/backupclient lib/backupstore

subversion at boxbackup.org subversion at boxbackup.org
Tue May 24 16:08:59 BST 2011


Author: chris
Date: 2011-05-24 16:08:59 +0100 (Tue, 24 May 2011)
New Revision: 2963

Added:
   box/trunk/lib/backupstore/BackupStoreFileCmbDiff.cpp
   box/trunk/lib/backupstore/BackupStoreFileCmbIdx.cpp
   box/trunk/lib/backupstore/BackupStoreFileCombine.cpp
   box/trunk/lib/backupstore/BackupStoreFileDiff.cpp
Removed:
   box/trunk/lib/backupclient/BackupStoreFileCmbDiff.cpp
   box/trunk/lib/backupclient/BackupStoreFileCmbIdx.cpp
   box/trunk/lib/backupclient/BackupStoreFileCombine.cpp
   box/trunk/lib/backupclient/BackupStoreFileDiff.cpp
Modified:
   box/trunk/modules.txt
Log:
Move remaining parts of BackupStoreFile into lib/backupstore, and fix module
dependencies to fail if anything else required by bbstored is still in
lib/backupclient instead of lib/backupstore.


Deleted: box/trunk/lib/backupclient/BackupStoreFileCmbDiff.cpp
===================================================================
--- box/trunk/lib/backupclient/BackupStoreFileCmbDiff.cpp	2011-05-24 13:33:50 UTC (rev 2962)
+++ box/trunk/lib/backupclient/BackupStoreFileCmbDiff.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -1,326 +0,0 @@
-// --------------------------------------------------------------------------
-//
-// File
-//		Name:    BackupStoreFileCmbDiff.cpp
-//		Purpose: Combine two diffs together
-//		Created: 12/7/04
-//
-// --------------------------------------------------------------------------
-
-#include "Box.h"
-
-#include <new>
-#include <stdlib.h>
-
-#include "BackupStoreFile.h"
-#include "BackupStoreFileWire.h"
-#include "BackupStoreObjectMagic.h"
-#include "BackupStoreException.h"
-#include "BackupStoreConstants.h"
-#include "BackupStoreFilename.h"
-
-#include "MemLeakFindOn.h"
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BackupStoreFile::CombineDiffs(IOStream &, IOStream &, IOStream &rOut)
-//		Purpose: Given two diffs, combine them into a single diff, to produce a diff
-//				 which, combined with the original file, creates the result of applying
-//				 rDiff, then rDiff2. Two opens of rDiff2 are required
-//		Created: 12/7/04
-//
-// --------------------------------------------------------------------------
-void BackupStoreFile::CombineDiffs(IOStream &rDiff1, IOStream &rDiff2, IOStream &rDiff2b, IOStream &rOut)
-{
-	// Skip header of first diff, record where the data starts, and skip to the index
-	int64_t diff1DataStarts = 0;
-	{
-		// Read the header for the From file
-		file_StreamFormat diff1Hdr;
-		if(!rDiff1.ReadFullBuffer(&diff1Hdr, sizeof(diff1Hdr), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-		}
-		if(ntohl(diff1Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
-		{
-			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-		}
-		// Skip over the filename and attributes of the From file
-		// BLOCK
-		{
-			BackupStoreFilename filename2;
-			filename2.ReadFromStream(rDiff1, IOStream::TimeOutInfinite);
-			int32_t size_s;
-			if(!rDiff1.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
-			{
-				THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
-			}
-			int size = ntohl(size_s);
-			// Skip forward the size
-			rDiff1.Seek(size, IOStream::SeekType_Relative);		
-		}
-		// Record position
-		diff1DataStarts = rDiff1.GetPosition();
-		// Skip to index
-		rDiff1.Seek(0 - (((box_ntoh64(diff1Hdr.mNumBlocks)) * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
-	}
-
-	// Read the index of the first diff
-	// Header first
-	file_BlockIndexHeader diff1IdxHdr;
-	if(!rDiff1.ReadFullBuffer(&diff1IdxHdr, sizeof(diff1IdxHdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-	}
-	if(ntohl(diff1IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	int64_t diff1NumBlocks = box_ntoh64(diff1IdxHdr.mNumBlocks);
-	// Allocate some memory
-	int64_t *diff1BlockStartPositions = (int64_t*)::malloc((diff1NumBlocks + 1) * sizeof(int64_t));
-	if(diff1BlockStartPositions == 0)
-	{
-		throw std::bad_alloc();
-	}
-
-	// Buffer data
-	void *buffer = 0;
-	int bufferSize = 0;	
-	
-	try
-	{
-		// Then the entries:
-		// For each entry, want to know if it's in the file, and if so, how big it is.
-		// We'll store this as an array of file positions in the file, with an additioal
-		// entry on the end so that we can work out the length of the last block.
-		// If an entry isn't in the file, then store 0 - (position in other file).
-		int64_t diff1Position = diff1DataStarts;
-		for(int64_t b = 0; b < diff1NumBlocks; ++b)
-		{
-			file_BlockIndexEntry e;
-			if(!rDiff1.ReadFullBuffer(&e, sizeof(e), 0))
-			{
-				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-			}
-	
-			// Where's the block?
-			int64_t blockEn = box_ntoh64(e.mEncodedSize);
-			if(blockEn <= 0)
-			{
-				// Just store the negated block number
-				diff1BlockStartPositions[b] = blockEn;
-			}
-			else
-			{
-				// Block is present in this file
-				diff1BlockStartPositions[b] = diff1Position;
-				diff1Position += blockEn;
-			}
-		}
-		
-		// Finish off the list, so the last entry can have it's size calcuated.
-		diff1BlockStartPositions[diff1NumBlocks] = diff1Position;
-
-		// Now read the second diff's header, copying it to the out file
-		file_StreamFormat diff2Hdr;
-		if(!rDiff2.ReadFullBuffer(&diff2Hdr, sizeof(diff2Hdr), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-		}
-		if(ntohl(diff2Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
-		{
-			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-		}
-		// Copy
-		rOut.Write(&diff2Hdr, sizeof(diff2Hdr));
-		// Copy over filename and attributes
-		// BLOCK
-		{
-			BackupStoreFilename filename;
-			filename.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
-			filename.WriteToStream(rOut);
-			StreamableMemBlock attr;
-			attr.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
-			attr.WriteToStream(rOut);
-		}
-		
-		// Get to the index of rDiff2b, and read the header
-		MoveStreamPositionToBlockIndex(rDiff2b);
-		file_BlockIndexHeader diff2IdxHdr;
-		if(!rDiff2b.ReadFullBuffer(&diff2IdxHdr, sizeof(diff2IdxHdr), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-		}
-		if(ntohl(diff2IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
-		{
-			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-		}
-		int64_t diff2NumBlocks = box_ntoh64(diff2IdxHdr.mNumBlocks);
-		int64_t diff2IndexEntriesStart = rDiff2b.GetPosition();
-		
-		// Then read all the entries
-		int64_t diff2FilePosition = rDiff2.GetPosition();
-		for(int64_t b = 0; b < diff2NumBlocks; ++b)
-		{
-			file_BlockIndexEntry e;
-			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
-			{
-				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-			}
-
-			// What do to next about copying data
-			bool copyBlock = false;
-			int copySize = 0;
-			int64_t copyFrom = 0;
-			bool fromFileDiff1 = false;
-	
-			// Where's the block?
-			int64_t blockEn = box_ntoh64(e.mEncodedSize);
-			if(blockEn > 0)
-			{
-				// Block is present in this file -- copy to out
-				copyBlock = true;
-				copyFrom = diff2FilePosition;
-				copySize = (int)blockEn;
-				
-				// Move pointer onwards
-				diff2FilePosition += blockEn;
-			}
-			else
-			{
-				// Block isn't present here -- is it present in the old one?
-				int64_t blockIndex = 0 - blockEn;
-				if(blockIndex < 0 || blockIndex > diff1NumBlocks)
-				{
-					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-				}
-				if(diff1BlockStartPositions[blockIndex] > 0)
-				{
-					// Block is in the old diff file, copy it across
-					copyBlock = true;
-					copyFrom = diff1BlockStartPositions[blockIndex];
-					int nb = blockIndex + 1;
-					while(diff1BlockStartPositions[nb] <= 0)
-					{
-						// This is safe, because the last entry will terminate it properly!
-						++nb;
-						ASSERT(nb <= diff1NumBlocks);
-					}
-					copySize = diff1BlockStartPositions[nb] - copyFrom;
-					fromFileDiff1 = true;
-				}
-			}
-			//TRACE4("%d %d %lld %d\n", copyBlock, copySize, copyFrom, fromFileDiff1);
-						
-			// Copy data to the output file?
-			if(copyBlock)
-			{
-				// Allocate enough space
-				if(bufferSize < copySize || buffer == 0)
-				{
-					// Free old block
-					if(buffer != 0)
-					{
-						::free(buffer);
-						buffer = 0;
-						bufferSize = 0;
-					}
-					// Allocate new block
-					buffer = ::malloc(copySize);
-					if(buffer == 0)
-					{
-						throw std::bad_alloc();
-					}
-					bufferSize = copySize;
-				}
-				ASSERT(bufferSize >= copySize);
-				
-				// Load in the data
-				if(fromFileDiff1)
-				{
-					rDiff1.Seek(copyFrom, IOStream::SeekType_Absolute);
-					if(!rDiff1.ReadFullBuffer(buffer, copySize, 0))
-					{
-						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-					}
-				}
-				else
-				{
-					rDiff2.Seek(copyFrom, IOStream::SeekType_Absolute);
-					if(!rDiff2.ReadFullBuffer(buffer, copySize, 0))
-					{
-						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-					}
-				}
-				// Write out data
-				rOut.Write(buffer, copySize);
-			}
-		}
-		
-		// Write the modified header
-		diff2IdxHdr.mOtherFileID = diff1IdxHdr.mOtherFileID;
-		rOut.Write(&diff2IdxHdr, sizeof(diff2IdxHdr));
-		
-		// Then we'll write out the index, reading the data again
-		rDiff2b.Seek(diff2IndexEntriesStart, IOStream::SeekType_Absolute);
-		for(int64_t b = 0; b < diff2NumBlocks; ++b)
-		{
-			file_BlockIndexEntry e;
-			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
-			{
-				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-			}
-	
-			// Where's the block?
-			int64_t blockEn = box_ntoh64(e.mEncodedSize);
-		
-			// If it's not in this file, it needs modification...
-			if(blockEn <= 0)
-			{
-				int64_t blockIndex = 0 - blockEn;
-				// In another file. Need to translate this against the other diff
-				if(diff1BlockStartPositions[blockIndex] > 0)
-				{
-					// Block is in the first diff file, stick in size
-					int nb = blockIndex + 1;
-					while(diff1BlockStartPositions[nb] <= 0)
-					{
-						// This is safe, because the last entry will terminate it properly!
-						++nb;
-						ASSERT(nb <= diff1NumBlocks);
-					}
-					int64_t size = diff1BlockStartPositions[nb] - diff1BlockStartPositions[blockIndex];
-					e.mEncodedSize = box_hton64(size);
-				}
-				else
-				{
-					// Block in the original file, use translated value
-					e.mEncodedSize = box_hton64(diff1BlockStartPositions[blockIndex]);
-				}
-			}
-			
-			// Write entry
-			rOut.Write(&e, sizeof(e));
-		}
-	}
-	catch(...)
-	{
-		// clean up
-		::free(diff1BlockStartPositions);
-		if(buffer != 0)
-		{
-			::free(buffer);
-		}
-		throw;
-	}
-	
-	// Clean up allocated memory
-	::free(diff1BlockStartPositions);
-	if(buffer != 0)
-	{
-		::free(buffer);
-	}
-}
-

Deleted: box/trunk/lib/backupclient/BackupStoreFileCmbIdx.cpp
===================================================================
--- box/trunk/lib/backupclient/BackupStoreFileCmbIdx.cpp	2011-05-24 13:33:50 UTC (rev 2962)
+++ box/trunk/lib/backupclient/BackupStoreFileCmbIdx.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -1,324 +0,0 @@
-// --------------------------------------------------------------------------
-//
-// File
-//		Name:    BackupStoreFileCmbIdx.cpp
-//		Purpose: Combine indicies of a delta file and the file it's a diff from.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-
-#include "Box.h"
-
-#include <new>
-#include <string.h>
-
-#include "BackupStoreFile.h"
-#include "BackupStoreFileWire.h"
-#include "BackupStoreObjectMagic.h"
-#include "BackupStoreException.h"
-#include "BackupStoreConstants.h"
-#include "BackupStoreFilename.h"
-
-#include "MemLeakFindOn.h"
-
-// Hide from outside world
-namespace
-{
-
-class BSFCombinedIndexStream : public IOStream
-{
-public:
-	BSFCombinedIndexStream(IOStream *pDiff);
-	~BSFCombinedIndexStream();
-	
-	virtual int Read(void *pBuffer, int NBytes, int Timeout = IOStream::TimeOutInfinite);
-	virtual void Write(const void *pBuffer, int NBytes);
-	virtual bool StreamDataLeft();
-	virtual bool StreamClosed();
-	virtual void Initialise(IOStream &rFrom);
-	
-private:
-	IOStream *mpDiff;
-	bool mIsInitialised;
-	bool mHeaderWritten;
-	file_BlockIndexHeader mHeader;
-	int64_t mNumEntriesToGo;
-	int64_t mNumEntriesInFromFile;
-	int64_t *mFromBlockSizes;		// NOTE: Entries in network byte order
-};
-
-};
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BackupStoreFile::CombineFileIndices(IOStream &, IOStream &, bool)
-//		Purpose: Given a diff file and the file it's a diff from, return a stream from which
-//				 can be read the index of the combined file, without actually combining them.
-//				 The stream of the diff must have a lifetime greater than or equal to the
-//				 lifetime of the returned stream object. The full "from" file stream
-//				 only needs to exist during the actual function call.
-//				 If you pass in dodgy files which aren't related, then you will either
-//				 get an error or bad results. So don't do that.
-//				 If DiffIsIndexOnly is true, then rDiff is assumed to be a stream positioned
-//				 at the beginning of the block index. Similarly for FromIsIndexOnly.
-//				 WARNING: Reads of the returned streams with buffer sizes less than 64 bytes
-//				 will not return any data.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-std::auto_ptr<IOStream> BackupStoreFile::CombineFileIndices(IOStream &rDiff, IOStream &rFrom, bool DiffIsIndexOnly, bool FromIsIndexOnly)
-{
-	// Reposition file pointers?
-	if(!DiffIsIndexOnly)
-	{
-		MoveStreamPositionToBlockIndex(rDiff);
-	}
-	if(!FromIsIndexOnly)
-	{
-		MoveStreamPositionToBlockIndex(rFrom);
-	}
-
-	// Create object
-	std::auto_ptr<IOStream> stream(new BSFCombinedIndexStream(&rDiff));
-
-	// Initialise it
-	((BSFCombinedIndexStream *)stream.get())->Initialise(rFrom);
-
-	// And return the stream
-	return stream;
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::BSFCombinedIndexStream()
-//		Purpose: Private class. Constructor.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-BSFCombinedIndexStream::BSFCombinedIndexStream(IOStream *pDiff)
-	: mpDiff(pDiff),
-	  mIsInitialised(false),
-	  mHeaderWritten(false),
-	  mNumEntriesToGo(0),
-	  mNumEntriesInFromFile(0),
-	  mFromBlockSizes(0)
-{
-	ASSERT(mpDiff != 0);
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::~BSFCombinedIndexStream()
-//		Purpose: Private class. Destructor.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-BSFCombinedIndexStream::~BSFCombinedIndexStream()
-{
-	if(mFromBlockSizes != 0)
-	{
-		::free(mFromBlockSizes);
-		mFromBlockSizes = 0;
-	}
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::Initialise(IOStream &)
-//		Purpose: Private class. Initalise from the streams (diff passed in constructor).
-//				 Both streams must have file pointer positioned at the block index.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-void BSFCombinedIndexStream::Initialise(IOStream &rFrom)
-{
-	// Paranoia is good.
-	if(mIsInitialised)
-	{
-		THROW_EXCEPTION(BackupStoreException, Internal)
-	}
-	
-	// Look at the diff file: Read in the header
-	if(!mpDiff->ReadFullBuffer(&mHeader, sizeof(mHeader), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-	}
-	if(ntohl(mHeader.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Read relevant data.
-	mNumEntriesToGo = box_ntoh64(mHeader.mNumBlocks);
-	
-	// Adjust a bit to reflect the fact it's no longer a diff
-	mHeader.mOtherFileID = box_hton64(0);
-	
-	// Now look at the from file: Read header
-	file_BlockIndexHeader fromHdr;
-	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-	}
-	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Then... allocate memory for the list of sizes
-	mNumEntriesInFromFile = box_ntoh64(fromHdr.mNumBlocks);
-	mFromBlockSizes = (int64_t*)::malloc(mNumEntriesInFromFile * sizeof(int64_t));
-	if(mFromBlockSizes == 0)
-	{
-		throw std::bad_alloc();
-	}
-	
-	// And read them all in!
-	for(int64_t b = 0; b < mNumEntriesInFromFile; ++b)
-	{
-		file_BlockIndexEntry e;
-		if(!rFrom.ReadFullBuffer(&e, sizeof(e), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-		}
-		
-		// Check that the from file isn't a delta in itself
-		if(box_ntoh64(e.mEncodedSize) <= 0)
-		{
-			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
-		}
-
-		// Store size (in network byte order)
-		mFromBlockSizes[b] = e.mEncodedSize;
-	}
-	
-	// Flag as initialised
-	mIsInitialised = true;
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::Read(void *, int, int)
-//		Purpose: Private class. As interface.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-int BSFCombinedIndexStream::Read(void *pBuffer, int NBytes, int Timeout)
-{
-	// Paranoia is good.
-	if(!mIsInitialised || mFromBlockSizes == 0 || mpDiff == 0)
-	{
-		THROW_EXCEPTION(BackupStoreException, Internal)
-	}
-	
-	int written = 0;
-	
-	// Header output yet?
-	if(!mHeaderWritten)
-	{
-		// Enough space?
-		if(NBytes < (int)sizeof(mHeader)) return 0;
-		
-		// Copy in
-		::memcpy(pBuffer, &mHeader, sizeof(mHeader));
-		NBytes -= sizeof(mHeader);
-		written += sizeof(mHeader);
-	
-		// Flag it's done
-		mHeaderWritten = true;
-	}
-
-	// How many entries can be written?
-	int entriesToWrite = NBytes / sizeof(file_BlockIndexEntry);
-	if(entriesToWrite > mNumEntriesToGo)
-	{
-		entriesToWrite = mNumEntriesToGo;
-	}
-	
-	// Setup ready to go
-	file_BlockIndexEntry *poutput = (file_BlockIndexEntry*)(((uint8_t*)pBuffer) + written);
-
-	// Write entries
-	for(int b = 0; b < entriesToWrite; ++b)
-	{
-		if(!mpDiff->ReadFullBuffer(&(poutput[b]), sizeof(file_BlockIndexEntry), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-		}
-		
-		// Does this need adjusting?
-		int s = box_ntoh64(poutput[b].mEncodedSize);
-		if(s <= 0)
-		{
-			// A reference to a block in the from file
-			int block = 0 - s;
-			ASSERT(block >= 0);
-			if(block >= mNumEntriesInFromFile)
-			{
-				// That's not good, the block doesn't exist
-				THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
-			}
-			
-			// Adjust the entry in the buffer
-			poutput[b].mEncodedSize = mFromBlockSizes[block];	// stored in network byte order, no translation necessary
-		}
-	}
-	
-	// Update written count
-	written += entriesToWrite * sizeof(file_BlockIndexEntry);
-	mNumEntriesToGo -= entriesToWrite;
-	
-	return written;
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::Write(const void *, int)
-//		Purpose: Private class. As interface.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-void BSFCombinedIndexStream::Write(const void *pBuffer, int NBytes)
-{
-	THROW_EXCEPTION(BackupStoreException, StreamDoesntHaveRequiredFeatures)
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::StreamDataLeft()
-//		Purpose: Private class. As interface
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-bool BSFCombinedIndexStream::StreamDataLeft()
-{
-	return (!mHeaderWritten) || (mNumEntriesToGo > 0);
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BSFCombinedIndexStream::StreamClosed()
-//		Purpose: Private class. As interface.
-//		Created: 8/7/04
-//
-// --------------------------------------------------------------------------
-bool BSFCombinedIndexStream::StreamClosed()
-{
-	return true;	// doesn't do writing
-}
-

Deleted: box/trunk/lib/backupclient/BackupStoreFileCombine.cpp
===================================================================
--- box/trunk/lib/backupclient/BackupStoreFileCombine.cpp	2011-05-24 13:33:50 UTC (rev 2962)
+++ box/trunk/lib/backupclient/BackupStoreFileCombine.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -1,410 +0,0 @@
-// --------------------------------------------------------------------------
-//
-// File
-//		Name:    BackupStoreFileCombine.cpp
-//		Purpose: File combining for BackupStoreFile
-//		Created: 16/1/04
-//
-// --------------------------------------------------------------------------
-
-#include "Box.h"
-
-#include <new>
-
-#include "BackupStoreFile.h"
-#include "BackupStoreFileWire.h"
-#include "BackupStoreObjectMagic.h"
-#include "BackupStoreException.h"
-#include "BackupStoreConstants.h"
-#include "BackupStoreFilename.h"
-#include "FileStream.h"
-
-#include "MemLeakFindOn.h"
-
-typedef struct
-{
-	int64_t mFilePosition;
-} FromIndexEntry;
-
-static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries);
-static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks, IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
-static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BackupStoreFile::CombineFile(IOStream &, IOStream &, IOStream &)
-//		Purpose: Where rDiff is a store file which is incomplete as a result of a
-//				 diffing operation, rFrom is the file it is diffed from, and 
-//				 rOut is the stream in which to place the result, the old file
-//				 and new file are combined into a file containing all the data.
-//				 rDiff2 is the same file as rDiff, opened again to get two
-//				 independent streams to the same file.
-//		Created: 16/1/04
-//
-// --------------------------------------------------------------------------
-void BackupStoreFile::CombineFile(IOStream &rDiff, IOStream &rDiff2, IOStream &rFrom, IOStream &rOut)
-{
-	// Read and copy the header.
-	file_StreamFormat hdr;
-	if(!rDiff.ReadFullBuffer(&hdr, sizeof(hdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-	}
-	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	// Copy
-	rOut.Write(&hdr, sizeof(hdr));
-	// Copy over filename and attributes
-	// BLOCK
-	{
-		BackupStoreFilename filename;
-		filename.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
-		filename.WriteToStream(rOut);
-		StreamableMemBlock attr;
-		attr.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
-		attr.WriteToStream(rOut);
-	}
-	
-	// Read the header for the From file
-	file_StreamFormat fromHdr;
-	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-	}
-	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	// Skip over the filename and attributes of the From file
-	// BLOCK
-	{
-		BackupStoreFilename filename2;
-		filename2.ReadFromStream(rFrom, IOStream::TimeOutInfinite);
-		int32_t size_s;
-		if(!rFrom.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
-		{
-			THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
-		}
-		int size = ntohl(size_s);
-		// Skip forward the size
-		rFrom.Seek(size, IOStream::SeekType_Relative);		
-	}
-	
-	// Allocate memory for the block index of the From file
-	int64_t fromNumBlocks = box_ntoh64(fromHdr.mNumBlocks);
-	// NOTE: An extra entry is required so that the length of the last block can be calculated
-	FromIndexEntry *pFromIndex = (FromIndexEntry*)::malloc((fromNumBlocks+1) * sizeof(FromIndexEntry));
-	if(pFromIndex == 0)
-	{
-		throw std::bad_alloc();
-	}
-	
-	try
-	{
-		// Load the index from the From file, calculating the offsets in the
-		// file as we go along, and enforce that everything should be present.
-		LoadFromIndex(rFrom, pFromIndex, fromNumBlocks);
-		
-		// Read in the block index of the Diff file in small chunks, and output data
-		// for each block, either from this file, or the other file.
-		int64_t diffNumBlocks = box_ntoh64(hdr.mNumBlocks);
-		CopyData(rDiff /* positioned at start of data */, rDiff2, diffNumBlocks, rFrom, pFromIndex, fromNumBlocks, rOut);
-		
-		// Read in the block index again, and output the new block index, simply
-		// filling in the sizes of blocks from the old file.
-		WriteNewIndex(rDiff, diffNumBlocks, pFromIndex, fromNumBlocks, rOut);
-		
-		// Free buffers
-		::free(pFromIndex);
-		pFromIndex = 0;
-	}
-	catch(...)
-	{
-		// Clean up
-		if(pFromIndex != 0)
-		{
-			::free(pFromIndex);
-			pFromIndex = 0;
-		}	
-		throw;
-	}
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static LoadFromIndex(IOStream &, FromIndexEntry *, int64_t)
-//		Purpose: Static. Load the index from the From file
-//		Created: 16/1/04
-//
-// --------------------------------------------------------------------------
-static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries)
-{
-	ASSERT(pIndex != 0);
-	ASSERT(NumEntries >= 0);
-
-	// Get the starting point in the file
-	int64_t filePos = rFrom.GetPosition();
-	
-	// Jump to the end of the file to read the index
-	rFrom.Seek(0 - ((NumEntries * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
-	
-	// Read block index header
-	file_BlockIndexHeader blkhdr;
-	if(!rFrom.ReadFullBuffer(&blkhdr, sizeof(blkhdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-	}
-	if(ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
-		|| (int64_t)box_ntoh64(blkhdr.mNumBlocks) != NumEntries)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// And then the block entries
-	for(int64_t b = 0; b < NumEntries; ++b)
-	{
-		// Read
-		file_BlockIndexEntry en;
-		if(!rFrom.ReadFullBuffer(&en, sizeof(en), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-		}
-		
-		// Add to list
-		pIndex[b].mFilePosition = filePos;
-
-		// Encoded size?
-		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
-		// Check that the block is actually there
-		if(encodedSize <= 0)
-		{
-			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
-		}
-
-		// Move file pointer on
-		filePos += encodedSize;
-	}
-	
-	// Store the position in the very last entry, so the size of the last entry can be calculated
-	pIndex[NumEntries].mFilePosition = filePos;
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static CopyData(IOStream &, IOStream &, int64_t, IOStream &, FromIndexEntry *, int64_t, IOStream &)
-//		Purpose: Static. Copy data from the Diff and From file to the out file.
-//				 rDiffData is at beginning of data.
-//				 rDiffIndex at any position.
-//				 rFrom is at any position.
-//				 rOut is after the header, ready for data
-//		Created: 16/1/04
-//
-// --------------------------------------------------------------------------
-static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks,
-	IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
-{
-	// Jump to the end of the diff file to read the index
-	rDiffIndex.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
-	
-	// Read block index header
-	file_BlockIndexHeader diffBlkhdr;
-	if(!rDiffIndex.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-	}
-	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
-		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Record where the From file is
-	int64_t fromPos = rFrom.GetPosition();
-	
-	// Buffer data
-	void *buffer = 0;
-	int bufferSize = 0;
-	
-	try
-	{
-		// Read the blocks in!
-		for(int64_t b = 0; b < DiffNumBlocks; ++b)
-		{
-			// Read
-			file_BlockIndexEntry en;
-			if(!rDiffIndex.ReadFullBuffer(&en, sizeof(en), 0))
-			{
-				THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-			}
-			
-			// What's the size value stored in the entry
-			int64_t encodedSize = box_ntoh64(en.mEncodedSize);
-			
-			// How much data will be read?
-			int32_t blockSize = 0;
-			if(encodedSize > 0)
-			{
-				// The block is actually in the diff file
-				blockSize = encodedSize;
-			}
-			else
-			{
-				// It's in the from file. First, check to see if it's valid
-				int64_t blockIdx = (0 - encodedSize);
-				if(blockIdx > FromNumBlocks)
-				{
-					// References a block which doesn't actually exist
-					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-				}
-				// Calculate size. This operation is safe because of the extra entry at the end
-				blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
-			}
-			ASSERT(blockSize > 0);
-			
-			// Make sure there's memory available to copy this
-			if(bufferSize < blockSize || buffer == 0)
-			{
-				// Free old block
-				if(buffer != 0)
-				{
-					::free(buffer);
-					buffer = 0;
-					bufferSize = 0;
-				}
-				// Allocate new block
-				buffer = ::malloc(blockSize);
-				if(buffer == 0)
-				{
-					throw std::bad_alloc();
-				}
-				bufferSize = blockSize;
-			}
-			ASSERT(bufferSize >= blockSize);
-			
-			// Load in data from one of the files
-			if(encodedSize > 0)
-			{
-				// Load from diff file
-				if(!rDiffData.ReadFullBuffer(buffer, blockSize, 0))
-				{
-					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-				}				
-			}
-			else
-			{
-				// Locate and read the data from the from file
-				int64_t blockIdx = (0 - encodedSize);
-				// Seek if necessary
-				if(fromPos != pFromIndex[blockIdx].mFilePosition)
-				{
-					rFrom.Seek(pFromIndex[blockIdx].mFilePosition, IOStream::SeekType_Absolute);
-					fromPos = pFromIndex[blockIdx].mFilePosition;
-				}
-				// Read
-				if(!rFrom.ReadFullBuffer(buffer, blockSize, 0))
-				{
-					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-				}
-				
-				// Update fromPos to current position
-				fromPos += blockSize;
-			}
-			
-			// Write data to out file
-			rOut.Write(buffer, blockSize);
-		}
-		
-		// Free buffer, if allocated
-		if(buffer != 0)
-		{
-			::free(buffer);
-			buffer = 0;
-		}
-	}
-	catch(...)
-	{
-		if(buffer != 0)
-		{
-			::free(buffer);
-			buffer = 0;
-		}
-		throw;
-	}
-}
-
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static WriteNewIndex(IOStream &, int64_t, FromIndexEntry *, int64_t, IOStream &)
-//		Purpose: Write the index to the out file, just copying from the diff file and
-//				 adjusting the entries.
-//		Created: 16/1/04
-//
-// --------------------------------------------------------------------------
-static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
-{
-	// Jump to the end of the diff file to read the index
-	rDiff.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
-	
-	// Read block index header
-	file_BlockIndexHeader diffBlkhdr;
-	if(!rDiff.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
-	{
-		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-	}
-	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
-		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Write it out with a blanked out other file ID
-	diffBlkhdr.mOtherFileID = box_hton64(0);
-	rOut.Write(&diffBlkhdr, sizeof(diffBlkhdr));
-	
-	// Rewrite the index
-	for(int64_t b = 0; b < DiffNumBlocks; ++b)
-	{
-		file_BlockIndexEntry en;
-		if(!rDiff.ReadFullBuffer(&en, sizeof(en), 0))
-		{
-			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
-		}
-		
-		// What's the size value stored in the entry
-		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
-		
-		// Need to adjust it?
-		if(encodedSize <= 0)
-		{
-			// This actually refers to a block in the from file. So rewrite this.
-			int64_t blockIdx = (0 - encodedSize);
-			if(blockIdx > FromNumBlocks)
-			{
-				// References a block which doesn't actually exist
-				THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-			}
-			// Calculate size. This operation is safe because of the extra entry at the end
-			int32_t blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
-			// Then replace entry
-			en.mEncodedSize = box_hton64(((uint64_t)blockSize));
-		}
-		
-		// Write entry
-		rOut.Write(&en, sizeof(en));
-	}
-}
-
-
-
-
-

Deleted: box/trunk/lib/backupclient/BackupStoreFileDiff.cpp
===================================================================
--- box/trunk/lib/backupclient/BackupStoreFileDiff.cpp	2011-05-24 13:33:50 UTC (rev 2962)
+++ box/trunk/lib/backupclient/BackupStoreFileDiff.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -1,1046 +0,0 @@
-// --------------------------------------------------------------------------
-//
-// File
-//		Name:    BackupStoreFileDiff.cpp
-//		Purpose: Functions relating to diffing BackupStoreFiles
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-
-#include "Box.h"
-
-#include <string.h>
-
-#include <new>
-#include <map>
-
-#ifdef HAVE_TIME_H
-	#include <time.h>
-#elif HAVE_SYS_TIME_H
-	#include <sys/time.h>
-#endif
-
-#include "BackupStoreConstants.h"
-#include "BackupStoreException.h"
-#include "BackupStoreFile.h"
-#include "BackupStoreFileCryptVar.h"
-#include "BackupStoreFileEncodeStream.h"
-#include "BackupStoreFileWire.h"
-#include "BackupStoreObjectMagic.h"
-#include "CommonException.h"
-#include "FileStream.h"
-#include "MD5Digest.h"
-#include "RollingChecksum.h"
-#include "Timer.h"
-
-#include "MemLeakFindOn.h"
-
-#include <cstring>
-
-using namespace BackupStoreFileCryptVar;
-using namespace BackupStoreFileCreation;
-
-// By default, don't trace out details of the diff as we go along -- would fill up logs significantly.
-// But it's useful for the test.
-#ifndef BOX_RELEASE_BUILD
-	bool BackupStoreFile::TraceDetailsOfDiffProcess = false;
-#endif
-
-static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis);
-static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]);
-static void SearchForMatchingBlocks(IOStream &rFile, 
-	std::map<int64_t, int64_t> &rFoundBlocks, BlocksAvailableEntry *pIndex, 
-	int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES],
-	DiffTimer *pDiffTimer);
-static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable);
-static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, int Offset, int32_t BlockSize, int64_t FileBlockNumber,
-BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks);
-static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex, int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile);
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &)
-//		Purpose: Move the file pointer in this stream to just before the block index.
-//				 Assumes that the stream is at the beginning, seekable, and
-//				 reading from the stream is OK.
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-void BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &rStream)
-{
-	// Size of file
-	int64_t fileSize = rStream.BytesLeftToRead();
-
-	// Get header
-	file_StreamFormat hdr;
-
-	// Read the header
-	if(!rStream.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, IOStream::TimeOutInfinite))
-	{
-		// Couldn't read header
-		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-	}
-
-	// Check magic number
-	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1
-#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
-		&& ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V0
-#endif
-		)
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Work out where the index is
-	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
-	int64_t blockHeaderPosFromEnd = ((numBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader));
-	
-	// Sanity check
-	if(blockHeaderPosFromEnd > static_cast<int64_t>(fileSize - sizeof(file_StreamFormat)))
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Seek to that position
-	rStream.Seek(0 - blockHeaderPosFromEnd, IOStream::SeekType_End);
-	
-	// Done. Stream now in right position (as long as the file is formatted correctly)
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    BackupStoreFile::EncodeFileDiff(const char *, int64_t, const BackupStoreFilename &, int64_t, IOStream &, int64_t *)
-//		Purpose: Similar to EncodeFile, but takes the object ID of the file it's
-//				 diffing from, and the index of the blocks in a stream. It'll then
-//				 calculate which blocks can be reused from that old file.
-//				 The timeout is the timeout value for reading the diff block index.
-//				 If pIsCompletelyDifferent != 0, it will be set to true if the
-//				 the two files are completely different (do not share any block), false otherwise.
-//				 
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-std::auto_ptr<IOStream> BackupStoreFile::EncodeFileDiff
-(
-	const char *Filename, int64_t ContainerID,
-	const BackupStoreFilename &rStoreFilename, int64_t DiffFromObjectID, 
-	IOStream &rDiffFromBlockIndex, int Timeout, DiffTimer *pDiffTimer, 
-	int64_t *pModificationTime, bool *pIsCompletelyDifferent)
-{
-	// Is it a symlink?
-	{
-		EMU_STRUCT_STAT st;
-		if(EMU_LSTAT(Filename, &st) != 0)
-		{
-			THROW_EXCEPTION(CommonException, OSFileError)
-		}
-		if((st.st_mode & S_IFLNK) == S_IFLNK)
-		{
-			// Don't do diffs for symlinks
-			if(pIsCompletelyDifferent != 0)
-			{
-				*pIsCompletelyDifferent = true;
-			}
-			return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
-		}
-	}
-
-	// Load in the blocks
-	BlocksAvailableEntry *pindex = 0;
-	int64_t blocksInIndex = 0;
-	bool canDiffFromThis = false;
-	LoadIndex(rDiffFromBlockIndex, DiffFromObjectID, &pindex, blocksInIndex, Timeout, canDiffFromThis);
-	// BOX_TRACE("Diff: Blocks in index: " << blocksInIndex);
-	
-	if(!canDiffFromThis)
-	{
-		// Don't do diffing...
-		if(pIsCompletelyDifferent != 0)
-		{
-			*pIsCompletelyDifferent = true;
-		}
-		return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
-	}
-	
-	// Pointer to recipe we're going to create
-	BackupStoreFileEncodeStream::Recipe *precipe = 0;
-	
-	try
-	{
-		// Find which sizes should be scanned
-		int32_t sizesToScan[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
-		FindMostUsedSizes(pindex, blocksInIndex, sizesToScan);
-		
-		// Flag for reporting to the user
-		bool completelyDifferent;
-			
-		// BLOCK
-		{
-			// Search the file to find matching blocks
-			std::map<int64_t, int64_t> foundBlocks; // map of offset in file to index in block index
-			int64_t sizeOfInputFile = 0;
-			// BLOCK
-			{
-				FileStream file(Filename);
-				// Get size of file
-				sizeOfInputFile = file.BytesLeftToRead();
-				// Find all those lovely matching blocks
-				SearchForMatchingBlocks(file, foundBlocks, pindex, 
-					blocksInIndex, sizesToScan, pDiffTimer);
-				
-				// Is it completely different?
-				completelyDifferent = (foundBlocks.size() == 0);
-			}
-			
-			// Create a recipe -- if the two files are completely different, don't put the from file ID in the recipe.
-			precipe = new BackupStoreFileEncodeStream::Recipe(pindex, blocksInIndex, completelyDifferent?(0):(DiffFromObjectID));
-			BlocksAvailableEntry *pindexKeptRef = pindex;	// we need this later, but must set pindex == 0 now, because of exceptions
-			pindex = 0;		// Recipe now has ownership
-			
-			// Fill it in
-			GenerateRecipe(*precipe, pindexKeptRef, blocksInIndex, foundBlocks, sizeOfInputFile);
-		}
-		// foundBlocks no longer required
-		
-		// Create the stream
-		std::auto_ptr<IOStream> stream(new BackupStoreFileEncodeStream);
-	
-		// Do the initial setup
-		((BackupStoreFileEncodeStream*)stream.get())->Setup(Filename, precipe, ContainerID, rStoreFilename, pModificationTime);
-		precipe = 0;	// Stream has taken ownership of this
-		
-		// Tell user about completely different status?
-		if(pIsCompletelyDifferent != 0)
-		{
-			*pIsCompletelyDifferent = completelyDifferent;
-		}
-	
-		// Return the stream for the caller
-		return stream;
-	}
-	catch(...)
-	{
-		// cleanup
-		if(pindex != 0)
-		{
-			::free(pindex);
-			pindex = 0;
-		}
-		if(precipe != 0)
-		{
-			delete precipe;
-			precipe = 0;
-		}
-		throw;
-	}
-}
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static LoadIndex(IOStream &, int64_t, BlocksAvailableEntry **, int64_t, bool &)
-//		Purpose: Read in an index, and decrypt, and store in the in memory block format.
-//				 rCanDiffFromThis is set to false if the version of the from file is too old.
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis)
-{
-	// Reset
-	rNumBlocksOut = 0;
-	rCanDiffFromThis = false;
-	
-	// Read header
-	file_BlockIndexHeader hdr;
-	if(!rBlockIndex.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, Timeout))
-	{
-		// Couldn't read header
-		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-	}
-
-#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
-	// Check against backwards comptaibility stuff
-	if(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0))
-	{
-		// Won't diff against old version
-		
-		// Absorb rest of stream
-		char buffer[2048];
-		while(rBlockIndex.StreamDataLeft())
-		{
-			rBlockIndex.Read(buffer, sizeof(buffer), 1000 /* 1 sec timeout */);
-		}
-		
-		// Tell caller
-		rCanDiffFromThis = false;
-		return;
-	}
-#endif
-
-	// Check magic
-	if(hdr.mMagicValue != (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1))
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// Check that we're not trying to diff against a file which references blocks from another file
-	if(((int64_t)box_ntoh64(hdr.mOtherFileID)) != 0)
-	{
-		THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
-	}
-
-	// Mark as an acceptable diff.
-	rCanDiffFromThis = true;
-
-	// Get basic information
-	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
-	uint64_t entryIVBase = box_ntoh64(hdr.mEntryIVBase);
-	
-	//TODO: Verify that these sizes look reasonable
-	
-	// Allocate space for the index
-	BlocksAvailableEntry *pindex = (BlocksAvailableEntry*)::malloc(sizeof(BlocksAvailableEntry) * numBlocks);
-	if(pindex == 0)
-	{
-		throw std::bad_alloc();
-	}
-	
-	try
-	{	
-		for(int64_t b = 0; b < numBlocks; ++b)
-		{
-			// Read an entry from the stream
-			file_BlockIndexEntry entry;
-			if(!rBlockIndex.ReadFullBuffer(&entry, sizeof(entry), 0 /* not interested in bytes read if this fails */, Timeout))
-			{
-				// Couldn't read entry
-				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
-			}	
-		
-			// Calculate IV for this entry
-			uint64_t iv = entryIVBase;
-			iv += b;
-			// Network byte order
-			iv = box_hton64(iv);
-			sBlowfishDecryptBlockEntry.SetIV(&iv);			
-			
-			// Decrypt the encrypted section
-			file_BlockIndexEntryEnc entryEnc;
-			int sectionSize = sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
-					entry.mEnEnc, sizeof(entry.mEnEnc));
-			if(sectionSize != sizeof(entryEnc))
-			{
-				THROW_EXCEPTION(BackupStoreException, BlockEntryEncodingDidntGiveExpectedLength)
-			}
-
-			// Check that we're not trying to diff against a file which references blocks from another file
-			if(((int64_t)box_ntoh64(entry.mEncodedSize)) <= 0)
-			{
-				THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
-			}
-			
-			// Store all the required information
-			pindex[b].mpNextInHashList = 0;	// hash list not set up yet
-			pindex[b].mSize = ntohl(entryEnc.mSize);
-			pindex[b].mWeakChecksum = ntohl(entryEnc.mWeakChecksum);
-			::memcpy(pindex[b].mStrongChecksum, entryEnc.mStrongChecksum, sizeof(pindex[b].mStrongChecksum));
-		}
-		
-		// Store index pointer for called
-		ASSERT(ppIndex != 0);
-		*ppIndex = pindex;
-
-		// Store number of blocks for caller
-		rNumBlocksOut = numBlocks;	
-
-	}
-	catch(...)
-	{
-		// clean up and send the exception along its way
-		::free(pindex);
-		throw;
-	}
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static FindMostUsedSizes(BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
-//		Purpose: Finds the most commonly used block sizes in the index
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
-{
-	// Array for lengths
-	int64_t sizeCounts[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
-
-	// Set arrays to lots of zeros (= unused entries)
-	for(int l = 0; l < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++l)
-	{
-		Sizes[l] = 0;
-		sizeCounts[l] = 0;
-	}
-
-	// Array for collecting sizes
-	std::map<int32_t, int64_t> foundSizes;
-	
-	// Run through blocks and make a count of the entries
-	for(int64_t b = 0; b < NumBlocks; ++b)
-	{
-		// Only if the block size is bigger than the minimum size we'll scan for
-		if(pIndex[b].mSize > BACKUP_FILE_DIFF_MIN_BLOCK_SIZE)
-		{
-			// Find entry?
-			std::map<int32_t, int64_t>::const_iterator f(foundSizes.find(pIndex[b].mSize));
-			if(f != foundSizes.end())
-			{
-				// Increment existing entry
-				foundSizes[pIndex[b].mSize] = foundSizes[pIndex[b].mSize] + 1;
-			}
-			else
-			{
-				// New entry
-				foundSizes[pIndex[b].mSize] = 1;
-			}
-		}
-	}
-	
-	// Make the block sizes
-	for(std::map<int32_t, int64_t>::const_iterator i(foundSizes.begin()); i != foundSizes.end(); ++i)
-	{
-		// Find the position of the size in the array
-		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
-		{
-			// Instead of sorting on the raw count of blocks,
-			// take the file area covered by this block size.
-			if(i->second * i->first > sizeCounts[t] * Sizes[t])
-			{
-				// Then this size belong before this entry -- shuffle them up
-				for(int s = (BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1); s >= t; --s)
-				{
-					Sizes[s] = Sizes[s-1];
-					sizeCounts[s] = sizeCounts[s-1];
-				}
-				
-				// Insert this size
-				Sizes[t] = i->first;
-				sizeCounts[t] = i->second;
-				
-				// Shouldn't do any more searching
-				break;
-			}
-		}
-	}
-	
-	// trace the size table in debug builds
-#ifndef BOX_RELEASE_BUILD
-	if(BackupStoreFile::TraceDetailsOfDiffProcess)
-	{
-		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
-		{
-			BOX_TRACE("Diff block size " << t << ": " <<
-				Sizes[t] << " (count = " << 
-				sizeCounts[t] << ")");
-		}
-	}
-#endif
-}
-
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static SearchForMatchingBlocks(IOStream &, std::map<int64_t, int64_t> &, BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
-//		Purpose: Find the matching blocks within the file.
-//		Created: 12/1/04
-//
-// --------------------------------------------------------------------------
-static void SearchForMatchingBlocks(IOStream &rFile, std::map<int64_t, int64_t> &rFoundBlocks,
-	BlocksAvailableEntry *pIndex, int64_t NumBlocks, 
-	int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES], DiffTimer *pDiffTimer)
-{
-	Timer maximumDiffingTime(0, "MaximumDiffingTime");
-
-	if(pDiffTimer && pDiffTimer->IsManaged())
-	{
-		maximumDiffingTime = Timer(pDiffTimer->GetMaximumDiffingTime(),
-			"MaximumDiffingTime");
-	}
-	
-	std::map<int64_t, int32_t> goodnessOfFit;
-
-	// Allocate the hash lookup table
-	BlocksAvailableEntry **phashTable = (BlocksAvailableEntry **)::malloc(sizeof(BlocksAvailableEntry *) * (64*1024));
-
-	// Choose a size for the buffer, just a little bit more than the maximum block size
-	int32_t bufSize = Sizes[0];
-	for(int z = 1; z < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++z)
-	{
-		if(Sizes[z] > bufSize) bufSize = Sizes[z];
-	}
-	bufSize += 4;
-	ASSERT(bufSize > Sizes[0]);
-	ASSERT(bufSize > 0);
-	if(bufSize > (BACKUP_FILE_MAX_BLOCK_SIZE + 1024))
-	{
-		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
-	}
-	
-	// TODO: Because we read in the file a scanned block size at a time,
-	// it is likely to be inefficient. Probably will be much better to
-	// calculate checksums for all block sizes in a single pass.
-
-	// Allocate the buffers.
-	uint8_t *pbuffer0 = (uint8_t *)::malloc(bufSize);
-	uint8_t *pbuffer1 = (uint8_t *)::malloc(bufSize);
-	try
-	{
-		// Check buffer allocation
-		if(pbuffer0 == 0 || pbuffer1 == 0 || phashTable == 0)
-		{
-			// If a buffer got allocated, it will be cleaned up in the catch block
-			throw std::bad_alloc();
-		}
-		
-		// Flag to abort the run, if too many blocks are found -- avoid using
-		// huge amounts of processor time when files contain many similar blocks.
-		bool abortSearch = false;
-		
-		// Search for each block size in turn
-		// NOTE: Do the smallest size first, so that the scheme for adding
-		// entries in the found list works as expected and replaces smallers block
-		// with larger blocks when it finds matches at the same offset in the file.
-		for(int s = BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1; s >= 0; --s)
-		{
-			ASSERT(Sizes[s] <= bufSize);
-			BOX_TRACE("Diff pass " << s << ", for block size " <<
-				Sizes[s]);
-			
-			// Check we haven't finished
-			if(Sizes[s] == 0)
-			{
-				// empty entry, try next size
-				continue;
-			}
-			
-			// Set up the hash table entries
-			SetupHashTable(pIndex, NumBlocks, Sizes[s], phashTable);
-		
-			// Shift file position to beginning
-			rFile.Seek(0, IOStream::SeekType_Absolute);
-			
-			// Read first block
-			if(rFile.Read(pbuffer0, Sizes[s]) != Sizes[s])
-			{
-				// Size of file too short to match -- do next size
-				continue;
-			}
-			
-			// Setup block pointers
-			uint8_t *beginnings = pbuffer0;
-			uint8_t *endings = pbuffer1;
-			int offset = 0;
-			
-			// Calculate the first checksum, ready for rolling
-			RollingChecksum rolling(beginnings, Sizes[s]);
-			
-			// Then roll, until the file is exhausted
-			int64_t fileBlockNumber = 0;
-			int64_t fileOffset = 0;
-			int rollOverInitialBytes = 0;
-			while(true)
-			{
-				if(maximumDiffingTime.HasExpired())
-				{
-					ASSERT(pDiffTimer != NULL);
-					BOX_INFO("MaximumDiffingTime reached - "
-						"suspending file diff");
-					abortSearch = true;
-					break;
-				}
-				
-				if(pDiffTimer)
-				{
-					pDiffTimer->DoKeepAlive();
-				}
-				
-				// Load in another block of data, and record how big it is
-				int bytesInEndings = rFile.Read(endings, Sizes[s]);
-				int tmp;
-
-				// Skip any bytes from a previous matched block
-				if(rollOverInitialBytes > 0 && offset < bytesInEndings)
-				{
-					int spaceLeft = bytesInEndings - offset;
-					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
-
-					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
-
-					offset += thisRoll;
-					fileOffset += thisRoll;
-					rollOverInitialBytes -= thisRoll;
-
-					if(rollOverInitialBytes)
-					{
-						goto refresh;
-					}
-				}
-
-				if(goodnessOfFit.count(fileOffset))
-				{
-					tmp = goodnessOfFit[fileOffset];
-				}
-				else
-				{
-					tmp = 0;
-				}
-
-				if(tmp >= Sizes[s])
-				{
-					// Skip over bigger ready-matched blocks completely
-					rollOverInitialBytes = tmp;
-					int spaceLeft = bytesInEndings - offset;
-					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
-
-					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
-
-					offset += thisRoll;
-					fileOffset += thisRoll;
-					rollOverInitialBytes -= thisRoll;
-
-					if(rollOverInitialBytes)
-					{
-						goto refresh;
-					}
-				}
-
-				while(offset < bytesInEndings)
-				{
-					// Is current checksum in hash list?
-					uint16_t hash = rolling.GetComponentForHashing();
-					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
-					{
-						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
-						{
-							BOX_TRACE("Found block match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
-							goodnessOfFit[fileOffset] = Sizes[s];
-
-							// Block matched, roll the checksum forward to the next block without doing
-							// any more comparisons, because these are pointless (as any more matches will be ignored when
-							// the recipe is generated) and just take up valuable processor time. Edge cases are
-							// especially nasty, using huge amounts of time and memory.
-							int skip = Sizes[s];
-							if(offset < bytesInEndings && skip > 0)
-							{
-								int spaceLeft = bytesInEndings - offset;
-								int thisRoll = (skip > spaceLeft) ? spaceLeft : skip;
-
-								rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
-
-								offset += thisRoll;
-								fileOffset += thisRoll;
-								skip -= thisRoll;
-							}
-							// Not all the bytes necessary will have been skipped, so get them
-							// skipped after the next block is loaded.
-							rollOverInitialBytes = skip;
-							
-							// End this loop, so the final byte isn't used again
-							break;
-						}
-						else
-						{
-							BOX_TRACE("False alarm match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
-						}
-
-						int64_t NumBlocksFound = static_cast<int64_t>(
-							rFoundBlocks.size());
-						int64_t MaxBlocksFound = NumBlocks * 
-							BACKUP_FILE_DIFF_MAX_BLOCK_FIND_MULTIPLE;
-						
-						if(NumBlocksFound > MaxBlocksFound)
-						{
-							abortSearch = true;
-							break;
-						}
-					}
-					
-					// Roll checksum forward
-					rolling.RollForward(beginnings[offset], endings[offset], Sizes[s]);
-				
-					// Increment offsets
-					++offset;
-					++fileOffset;
-				}
-				
-				if(abortSearch) break;
-				
-			refresh:
-				// Finished?
-				if(bytesInEndings != Sizes[s])
-				{
-					// No more data in file -- check the final block
-					// (Do a copy and paste of 5 lines of code instead of introducing a comparison for
-					// each byte of the file)
-					uint16_t hash = rolling.GetComponentForHashing();
-					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
-					{
-						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
-						{
-							goodnessOfFit[fileOffset] = Sizes[s];
-						}
-					}
-
-					// finish
-					break;
-				}
-				
-				// Switch buffers, reset offset
-				beginnings = endings;
-				endings = (beginnings == pbuffer0)?(pbuffer1):(pbuffer0);	// ie the other buffer
-				offset = 0;
-
-				// And count the blocks which have been done
-				++fileBlockNumber;
-			}
-
-			if(abortSearch) break;
-		}
-		
-		// Free buffers and hash table
-		::free(pbuffer1);
-		pbuffer1 = 0;
-		::free(pbuffer0);
-		pbuffer0 = 0;
-		::free(phashTable);
-		phashTable = 0;
-	}
-	catch(...)
-	{
-		// Cleanup and throw
-		if(pbuffer1 != 0) ::free(pbuffer1);
-		if(pbuffer0 != 0) ::free(pbuffer0);
-		if(phashTable != 0) ::free(phashTable);
-		throw;
-	}
-	
-#ifndef BOX_RELEASE_BUILD
-	if(BackupStoreFile::TraceDetailsOfDiffProcess)
-	{
-		// Trace out the found blocks in debug mode
-		BOX_TRACE("Diff: list of found blocks");
-		BOX_TRACE("======== ======== ======== ========");
-		BOX_TRACE("  Offset   BlkIdx     Size Movement");
-		for(std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin()); i != rFoundBlocks.end(); ++i)
-		{
-			int64_t orgLoc = 0;
-			for(int64_t b = 0; b < i->second; ++b)
-			{
-				orgLoc += pIndex[b].mSize;
-			}
-			BOX_TRACE(std::setw(8) << i->first << " " <<
-				std::setw(8) << i->second << " " <<
-				std::setw(8) << pIndex[i->second].mSize << 
-				" " << 
-				std::setw(8) << (i->first - orgLoc));
-		}
-		BOX_TRACE("======== ======== ======== ========");
-	}
-#endif
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static SetupHashTable(BlocksAvailableEntry *, int64_t, in32_t, BlocksAvailableEntry **)
-//		Purpose: Set up the hash table ready for a scan
-//		Created: 14/1/04
-//
-// --------------------------------------------------------------------------
-static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable)
-{
-	// Set all entries in the hash table to zero
-	::memset(pHashTable, 0, (sizeof(BlocksAvailableEntry *) * (64*1024)));
-
-	// Scan through the blocks, building the hash table
-	for(int64_t b = 0; b < NumBlocks; ++b)
-	{
-		// Only look at the required block size
-		if(pIndex[b].mSize == BlockSize)
-		{
-			// Get the value under which to hash this entry
-			uint16_t hash = RollingChecksum::ExtractHashingComponent(pIndex[b].mWeakChecksum);
-			
-			// Already present in table?
-			if(pHashTable[hash] != 0)
-			{
-				//BOX_TRACE("Another hash entry for " << hash << " found");
-				// Yes -- need to set the pointer in this entry to the current entry to build the linked list
-				pIndex[b].mpNextInHashList = pHashTable[hash];
-			}
-
-			// Put a pointer to this entry in the hash table
-			pHashTable[hash] = pIndex + b;
-		}
-	}
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static bool SecondStageMatch(xxx)
-//		Purpose: When a match in the hash table is found, scan for second stage match using strong checksum.
-//		Created: 14/1/04
-//
-// --------------------------------------------------------------------------
-static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings,
-	int Offset, int32_t BlockSize, int64_t FileBlockNumber, BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks)
-{
-	// Check parameters
-	ASSERT(pBeginnings != 0);
-	ASSERT(pEndings != 0);
-	ASSERT(Offset >= 0);
-	ASSERT(BlockSize > 0);
-	ASSERT(pFirstInHashList != 0);
-	ASSERT(pIndex != 0);
-
-#ifndef BOX_RELEASE_BUILD
-	uint16_t DEBUG_Hash = fastSum.GetComponentForHashing();
-#endif
-	uint32_t Checksum = fastSum.GetChecksum();
-
-	// Before we go to the expense of the MD5, make sure it's a darn good match on the checksum we already know.
-	BlocksAvailableEntry *scan = pFirstInHashList;
-	bool found=false;
-	while(scan != 0)
-	{
-		if(scan->mWeakChecksum == Checksum)
-		{
-			found = true;
-			break;
-		}
-		scan = scan->mpNextInHashList;
-	}
-	if(!found)
-	{
-		return false;
-	}
-
-	// Calculate the strong MD5 digest for this block
-	MD5Digest strong;
-	// Add the data from the beginnings
-	strong.Add(pBeginnings + Offset, BlockSize - Offset);
-	// Add any data from the endings
-	if(Offset > 0)
-	{
-		strong.Add(pEndings, Offset);
-	}
-	strong.Finish();
-	
-	// Then go through the entries in the hash list, comparing with the strong digest calculated
-	scan = pFirstInHashList;
-	//BOX_TRACE("second stage match");
-	while(scan != 0)
-	{
-		//BOX_TRACE("scan size " << scan->mSize <<
-		//	", block size " << BlockSize <<
-		//	", hash " << Hash);
-		ASSERT(scan->mSize == BlockSize);
-		ASSERT(RollingChecksum::ExtractHashingComponent(scan->mWeakChecksum) == DEBUG_Hash);
-	
-		// Compare?
-		if(strong.DigestMatches(scan->mStrongChecksum))
-		{
-			//BOX_TRACE("Match!\n");
-			// Found! Add to list of found blocks...
-			int64_t fileOffset = (FileBlockNumber * BlockSize) + Offset;
-			int64_t blockIndex = (scan - pIndex);	// pointer arthmitic is frowned upon. But most efficient way of doing it here -- alternative is to use more memory
-			
-			// We do NOT search for smallest blocks first, as this code originally assumed.
-			// To prevent this from potentially overwriting a better match, the caller must determine
-			// the relative "goodness" of any existing match and this one, and avoid the call if it
-			// could be detrimental.
-			rFoundBlocks[fileOffset] = blockIndex;
-			
-			// No point in searching further, report success
-			return true;
-		}
-	
-		// Next
-		scan = scan->mpNextInHashList;
-	}
-	
-	// Not matched
-	return false;
-}
-
-
-// --------------------------------------------------------------------------
-//
-// Function
-//		Name:    static GenerateRecipe(BackupStoreFileEncodeStream::Recipe &, BlocksAvailableEntry *, int64_t, std::map<int64_t, int64_t> &)
-//		Purpose: Fills in the recipe from the found block list
-//		Created: 15/1/04
-//
-// --------------------------------------------------------------------------
-static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex,
-		int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile)
-{
-	// NOTE: This function could be a lot more sophisiticated. For example, if
-	// a small block overlaps a big block like this
-	//   ****
-	//      *******************************
-	// then the small block will be used, not the big one. But it'd be better to
-	// just ignore the small block and keep the big one. However, some stats should
-	// be gathered about real world files before writing complex code which might
-	// go wrong.
-
-	// Initialise a blank instruction
-	BackupStoreFileEncodeStream::RecipeInstruction instruction;
-	#define RESET_INSTRUCTION			\
-	instruction.mSpaceBefore = 0;		\
-	instruction.mBlocks = 0;			\
-	instruction.mpStartBlock = 0;
-	RESET_INSTRUCTION
-
-	// First, a special case for when there are no found blocks
-	if(rFoundBlocks.size() == 0)
-	{
-		// No blocks, just a load of space
-		instruction.mSpaceBefore = SizeOfInputFile;
-		rRecipe.push_back(instruction);	
-	
-		#ifndef BOX_RELEASE_BUILD
-		if(BackupStoreFile::TraceDetailsOfDiffProcess)
-		{
-			BOX_TRACE("Diff: Default recipe generated, " << 
-				SizeOfInputFile << " bytes of file");
-		}
-		#endif
-		
-		// Don't do anything
-		return;
-	}
-
-	// Current location
-	int64_t loc = 0;
-
-	// Then iterate through the list, generating the recipe
-	std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin());
-	ASSERT(i != rFoundBlocks.end());	// check logic
-
-	// Counting for debug tracing
-#ifndef BOX_RELEASE_BUILD
-	int64_t debug_NewBytesFound = 0;
-	int64_t debug_OldBlocksUsed = 0;
-#endif
-	
-	for(; i != rFoundBlocks.end(); ++i)
-	{
-		// Remember... map is (position in file) -> (index of block in pIndex)
-		
-		if(i->first < loc)
-		{
-			// This block overlaps the last one
-			continue;
-		}
-		else if(i->first > loc)
-		{
-			// There's a gap between the end of the last thing and this block.
-			// If there's an instruction waiting, push it onto the list
-			if(instruction.mSpaceBefore != 0 || instruction.mpStartBlock != 0)
-			{
-				rRecipe.push_back(instruction);
-			}
-			// Start a new instruction, with the gap ready
-			RESET_INSTRUCTION
-			instruction.mSpaceBefore = i->first - loc;
-			// Move location forward to match
-			loc += instruction.mSpaceBefore;
-#ifndef BOX_RELEASE_BUILD
-			debug_NewBytesFound += instruction.mSpaceBefore;
-#endif
-		}
-		
-		// First, does the current instruction need pushing back, because this block is not
-		// sequential to the last one?
-		if(instruction.mpStartBlock != 0 && (pIndex + i->second) != (instruction.mpStartBlock + instruction.mBlocks))
-		{
-			rRecipe.push_back(instruction);
-			RESET_INSTRUCTION
-		}
-		
-		// Add in this block
-		if(instruction.mpStartBlock == 0)
-		{
-			// This block starts a new instruction
-			instruction.mpStartBlock = pIndex + i->second;
-			instruction.mBlocks = 1;
-		}
-		else
-		{
-			// It continues the previous section of blocks
-			instruction.mBlocks += 1;
-		}
-
-#ifndef BOX_RELEASE_BUILD
-		debug_OldBlocksUsed++;
-#endif
-
-		// Move location forward
-		loc += pIndex[i->second].mSize;
-	}
-	
-	// Push the last instruction generated
-	rRecipe.push_back(instruction);
-	
-	// Is there any space left at the end which needs sending?
-	if(loc != SizeOfInputFile)
-	{
-		RESET_INSTRUCTION
-		instruction.mSpaceBefore = SizeOfInputFile - loc;
-#ifndef BOX_RELEASE_BUILD
-		debug_NewBytesFound += instruction.mSpaceBefore;
-#endif
-		rRecipe.push_back(instruction);		
-	}
-	
-	// dump out the recipe
-#ifndef BOX_RELEASE_BUILD
-	BOX_TRACE("Diff: " << 
-		debug_NewBytesFound << " new bytes found, " <<
-		debug_OldBlocksUsed << " old blocks used");
-	if(BackupStoreFile::TraceDetailsOfDiffProcess)
-	{
-		BOX_TRACE("Diff: Recipe generated (size " << rRecipe.size());
-		BOX_TRACE("======== ========= ========");
-		BOX_TRACE("Space b4 FirstBlk  NumBlks");
-		{
-			for(unsigned int e = 0; e < rRecipe.size(); ++e)
-			{
-				char b[64];
-#ifdef WIN32
-				sprintf(b, "%8I64d", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
-#else
-				sprintf(b, "%8lld", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
-#endif
-				BOX_TRACE(std::setw(8) <<
-					rRecipe[e].mSpaceBefore <<
-					" " <<
-					((rRecipe[e].mpStartBlock == 0)?"       -":b) <<
-					" " << std::setw(8) <<
-					rRecipe[e].mBlocks);
-			}
-		}
-		BOX_TRACE("======== ========= ========");
-	}
-#endif
-}

Copied: box/trunk/lib/backupstore/BackupStoreFileCmbDiff.cpp (from rev 2961, box/trunk/lib/backupclient/BackupStoreFileCmbDiff.cpp)
===================================================================
--- box/trunk/lib/backupstore/BackupStoreFileCmbDiff.cpp	                        (rev 0)
+++ box/trunk/lib/backupstore/BackupStoreFileCmbDiff.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -0,0 +1,326 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCmbDiff.cpp
+//		Purpose: Combine two diffs together
+//		Created: 12/7/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+#include <stdlib.h>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+
+#include "MemLeakFindOn.h"
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineDiffs(IOStream &, IOStream &, IOStream &rOut)
+//		Purpose: Given two diffs, combine them into a single diff, to produce a diff
+//				 which, combined with the original file, creates the result of applying
+//				 rDiff, then rDiff2. Two opens of rDiff2 are required
+//		Created: 12/7/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::CombineDiffs(IOStream &rDiff1, IOStream &rDiff2, IOStream &rDiff2b, IOStream &rOut)
+{
+	// Skip header of first diff, record where the data starts, and skip to the index
+	int64_t diff1DataStarts = 0;
+	{
+		// Read the header for the From file
+		file_StreamFormat diff1Hdr;
+		if(!rDiff1.ReadFullBuffer(&diff1Hdr, sizeof(diff1Hdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		if(ntohl(diff1Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		// Skip over the filename and attributes of the From file
+		// BLOCK
+		{
+			BackupStoreFilename filename2;
+			filename2.ReadFromStream(rDiff1, IOStream::TimeOutInfinite);
+			int32_t size_s;
+			if(!rDiff1.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
+			{
+				THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
+			}
+			int size = ntohl(size_s);
+			// Skip forward the size
+			rDiff1.Seek(size, IOStream::SeekType_Relative);		
+		}
+		// Record position
+		diff1DataStarts = rDiff1.GetPosition();
+		// Skip to index
+		rDiff1.Seek(0 - (((box_ntoh64(diff1Hdr.mNumBlocks)) * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	}
+
+	// Read the index of the first diff
+	// Header first
+	file_BlockIndexHeader diff1IdxHdr;
+	if(!rDiff1.ReadFullBuffer(&diff1IdxHdr, sizeof(diff1IdxHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(diff1IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	int64_t diff1NumBlocks = box_ntoh64(diff1IdxHdr.mNumBlocks);
+	// Allocate some memory
+	int64_t *diff1BlockStartPositions = (int64_t*)::malloc((diff1NumBlocks + 1) * sizeof(int64_t));
+	if(diff1BlockStartPositions == 0)
+	{
+		throw std::bad_alloc();
+	}
+
+	// Buffer data
+	void *buffer = 0;
+	int bufferSize = 0;	
+	
+	try
+	{
+		// Then the entries:
+		// For each entry, want to know if it's in the file, and if so, how big it is.
+		// We'll store this as an array of file positions in the file, with an additioal
+		// entry on the end so that we can work out the length of the last block.
+		// If an entry isn't in the file, then store 0 - (position in other file).
+		int64_t diff1Position = diff1DataStarts;
+		for(int64_t b = 0; b < diff1NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff1.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+			if(blockEn <= 0)
+			{
+				// Just store the negated block number
+				diff1BlockStartPositions[b] = blockEn;
+			}
+			else
+			{
+				// Block is present in this file
+				diff1BlockStartPositions[b] = diff1Position;
+				diff1Position += blockEn;
+			}
+		}
+		
+		// Finish off the list, so the last entry can have it's size calcuated.
+		diff1BlockStartPositions[diff1NumBlocks] = diff1Position;
+
+		// Now read the second diff's header, copying it to the out file
+		file_StreamFormat diff2Hdr;
+		if(!rDiff2.ReadFullBuffer(&diff2Hdr, sizeof(diff2Hdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		if(ntohl(diff2Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		// Copy
+		rOut.Write(&diff2Hdr, sizeof(diff2Hdr));
+		// Copy over filename and attributes
+		// BLOCK
+		{
+			BackupStoreFilename filename;
+			filename.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
+			filename.WriteToStream(rOut);
+			StreamableMemBlock attr;
+			attr.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
+			attr.WriteToStream(rOut);
+		}
+		
+		// Get to the index of rDiff2b, and read the header
+		MoveStreamPositionToBlockIndex(rDiff2b);
+		file_BlockIndexHeader diff2IdxHdr;
+		if(!rDiff2b.ReadFullBuffer(&diff2IdxHdr, sizeof(diff2IdxHdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		if(ntohl(diff2IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		int64_t diff2NumBlocks = box_ntoh64(diff2IdxHdr.mNumBlocks);
+		int64_t diff2IndexEntriesStart = rDiff2b.GetPosition();
+		
+		// Then read all the entries
+		int64_t diff2FilePosition = rDiff2.GetPosition();
+		for(int64_t b = 0; b < diff2NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+
+			// What do to next about copying data
+			bool copyBlock = false;
+			int copySize = 0;
+			int64_t copyFrom = 0;
+			bool fromFileDiff1 = false;
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+			if(blockEn > 0)
+			{
+				// Block is present in this file -- copy to out
+				copyBlock = true;
+				copyFrom = diff2FilePosition;
+				copySize = (int)blockEn;
+				
+				// Move pointer onwards
+				diff2FilePosition += blockEn;
+			}
+			else
+			{
+				// Block isn't present here -- is it present in the old one?
+				int64_t blockIndex = 0 - blockEn;
+				if(blockIndex < 0 || blockIndex > diff1NumBlocks)
+				{
+					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+				}
+				if(diff1BlockStartPositions[blockIndex] > 0)
+				{
+					// Block is in the old diff file, copy it across
+					copyBlock = true;
+					copyFrom = diff1BlockStartPositions[blockIndex];
+					int nb = blockIndex + 1;
+					while(diff1BlockStartPositions[nb] <= 0)
+					{
+						// This is safe, because the last entry will terminate it properly!
+						++nb;
+						ASSERT(nb <= diff1NumBlocks);
+					}
+					copySize = diff1BlockStartPositions[nb] - copyFrom;
+					fromFileDiff1 = true;
+				}
+			}
+			//TRACE4("%d %d %lld %d\n", copyBlock, copySize, copyFrom, fromFileDiff1);
+						
+			// Copy data to the output file?
+			if(copyBlock)
+			{
+				// Allocate enough space
+				if(bufferSize < copySize || buffer == 0)
+				{
+					// Free old block
+					if(buffer != 0)
+					{
+						::free(buffer);
+						buffer = 0;
+						bufferSize = 0;
+					}
+					// Allocate new block
+					buffer = ::malloc(copySize);
+					if(buffer == 0)
+					{
+						throw std::bad_alloc();
+					}
+					bufferSize = copySize;
+				}
+				ASSERT(bufferSize >= copySize);
+				
+				// Load in the data
+				if(fromFileDiff1)
+				{
+					rDiff1.Seek(copyFrom, IOStream::SeekType_Absolute);
+					if(!rDiff1.ReadFullBuffer(buffer, copySize, 0))
+					{
+						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+					}
+				}
+				else
+				{
+					rDiff2.Seek(copyFrom, IOStream::SeekType_Absolute);
+					if(!rDiff2.ReadFullBuffer(buffer, copySize, 0))
+					{
+						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+					}
+				}
+				// Write out data
+				rOut.Write(buffer, copySize);
+			}
+		}
+		
+		// Write the modified header
+		diff2IdxHdr.mOtherFileID = diff1IdxHdr.mOtherFileID;
+		rOut.Write(&diff2IdxHdr, sizeof(diff2IdxHdr));
+		
+		// Then we'll write out the index, reading the data again
+		rDiff2b.Seek(diff2IndexEntriesStart, IOStream::SeekType_Absolute);
+		for(int64_t b = 0; b < diff2NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+		
+			// If it's not in this file, it needs modification...
+			if(blockEn <= 0)
+			{
+				int64_t blockIndex = 0 - blockEn;
+				// In another file. Need to translate this against the other diff
+				if(diff1BlockStartPositions[blockIndex] > 0)
+				{
+					// Block is in the first diff file, stick in size
+					int nb = blockIndex + 1;
+					while(diff1BlockStartPositions[nb] <= 0)
+					{
+						// This is safe, because the last entry will terminate it properly!
+						++nb;
+						ASSERT(nb <= diff1NumBlocks);
+					}
+					int64_t size = diff1BlockStartPositions[nb] - diff1BlockStartPositions[blockIndex];
+					e.mEncodedSize = box_hton64(size);
+				}
+				else
+				{
+					// Block in the original file, use translated value
+					e.mEncodedSize = box_hton64(diff1BlockStartPositions[blockIndex]);
+				}
+			}
+			
+			// Write entry
+			rOut.Write(&e, sizeof(e));
+		}
+	}
+	catch(...)
+	{
+		// clean up
+		::free(diff1BlockStartPositions);
+		if(buffer != 0)
+		{
+			::free(buffer);
+		}
+		throw;
+	}
+	
+	// Clean up allocated memory
+	::free(diff1BlockStartPositions);
+	if(buffer != 0)
+	{
+		::free(buffer);
+	}
+}
+

Copied: box/trunk/lib/backupstore/BackupStoreFileCmbIdx.cpp (from rev 2961, box/trunk/lib/backupclient/BackupStoreFileCmbIdx.cpp)
===================================================================
--- box/trunk/lib/backupstore/BackupStoreFileCmbIdx.cpp	                        (rev 0)
+++ box/trunk/lib/backupstore/BackupStoreFileCmbIdx.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -0,0 +1,324 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCmbIdx.cpp
+//		Purpose: Combine indicies of a delta file and the file it's a diff from.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+#include <string.h>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+
+#include "MemLeakFindOn.h"
+
+// Hide from outside world
+namespace
+{
+
+class BSFCombinedIndexStream : public IOStream
+{
+public:
+	BSFCombinedIndexStream(IOStream *pDiff);
+	~BSFCombinedIndexStream();
+	
+	virtual int Read(void *pBuffer, int NBytes, int Timeout = IOStream::TimeOutInfinite);
+	virtual void Write(const void *pBuffer, int NBytes);
+	virtual bool StreamDataLeft();
+	virtual bool StreamClosed();
+	virtual void Initialise(IOStream &rFrom);
+	
+private:
+	IOStream *mpDiff;
+	bool mIsInitialised;
+	bool mHeaderWritten;
+	file_BlockIndexHeader mHeader;
+	int64_t mNumEntriesToGo;
+	int64_t mNumEntriesInFromFile;
+	int64_t *mFromBlockSizes;		// NOTE: Entries in network byte order
+};
+
+};
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineFileIndices(IOStream &, IOStream &, bool)
+//		Purpose: Given a diff file and the file it's a diff from, return a stream from which
+//				 can be read the index of the combined file, without actually combining them.
+//				 The stream of the diff must have a lifetime greater than or equal to the
+//				 lifetime of the returned stream object. The full "from" file stream
+//				 only needs to exist during the actual function call.
+//				 If you pass in dodgy files which aren't related, then you will either
+//				 get an error or bad results. So don't do that.
+//				 If DiffIsIndexOnly is true, then rDiff is assumed to be a stream positioned
+//				 at the beginning of the block index. Similarly for FromIsIndexOnly.
+//				 WARNING: Reads of the returned streams with buffer sizes less than 64 bytes
+//				 will not return any data.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+std::auto_ptr<IOStream> BackupStoreFile::CombineFileIndices(IOStream &rDiff, IOStream &rFrom, bool DiffIsIndexOnly, bool FromIsIndexOnly)
+{
+	// Reposition file pointers?
+	if(!DiffIsIndexOnly)
+	{
+		MoveStreamPositionToBlockIndex(rDiff);
+	}
+	if(!FromIsIndexOnly)
+	{
+		MoveStreamPositionToBlockIndex(rFrom);
+	}
+
+	// Create object
+	std::auto_ptr<IOStream> stream(new BSFCombinedIndexStream(&rDiff));
+
+	// Initialise it
+	((BSFCombinedIndexStream *)stream.get())->Initialise(rFrom);
+
+	// And return the stream
+	return stream;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::BSFCombinedIndexStream()
+//		Purpose: Private class. Constructor.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+BSFCombinedIndexStream::BSFCombinedIndexStream(IOStream *pDiff)
+	: mpDiff(pDiff),
+	  mIsInitialised(false),
+	  mHeaderWritten(false),
+	  mNumEntriesToGo(0),
+	  mNumEntriesInFromFile(0),
+	  mFromBlockSizes(0)
+{
+	ASSERT(mpDiff != 0);
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::~BSFCombinedIndexStream()
+//		Purpose: Private class. Destructor.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+BSFCombinedIndexStream::~BSFCombinedIndexStream()
+{
+	if(mFromBlockSizes != 0)
+	{
+		::free(mFromBlockSizes);
+		mFromBlockSizes = 0;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Initialise(IOStream &)
+//		Purpose: Private class. Initalise from the streams (diff passed in constructor).
+//				 Both streams must have file pointer positioned at the block index.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+void BSFCombinedIndexStream::Initialise(IOStream &rFrom)
+{
+	// Paranoia is good.
+	if(mIsInitialised)
+	{
+		THROW_EXCEPTION(BackupStoreException, Internal)
+	}
+	
+	// Look at the diff file: Read in the header
+	if(!mpDiff->ReadFullBuffer(&mHeader, sizeof(mHeader), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(mHeader.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Read relevant data.
+	mNumEntriesToGo = box_ntoh64(mHeader.mNumBlocks);
+	
+	// Adjust a bit to reflect the fact it's no longer a diff
+	mHeader.mOtherFileID = box_hton64(0);
+	
+	// Now look at the from file: Read header
+	file_BlockIndexHeader fromHdr;
+	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Then... allocate memory for the list of sizes
+	mNumEntriesInFromFile = box_ntoh64(fromHdr.mNumBlocks);
+	mFromBlockSizes = (int64_t*)::malloc(mNumEntriesInFromFile * sizeof(int64_t));
+	if(mFromBlockSizes == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	// And read them all in!
+	for(int64_t b = 0; b < mNumEntriesInFromFile; ++b)
+	{
+		file_BlockIndexEntry e;
+		if(!rFrom.ReadFullBuffer(&e, sizeof(e), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		
+		// Check that the from file isn't a delta in itself
+		if(box_ntoh64(e.mEncodedSize) <= 0)
+		{
+			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+		}
+
+		// Store size (in network byte order)
+		mFromBlockSizes[b] = e.mEncodedSize;
+	}
+	
+	// Flag as initialised
+	mIsInitialised = true;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Read(void *, int, int)
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+int BSFCombinedIndexStream::Read(void *pBuffer, int NBytes, int Timeout)
+{
+	// Paranoia is good.
+	if(!mIsInitialised || mFromBlockSizes == 0 || mpDiff == 0)
+	{
+		THROW_EXCEPTION(BackupStoreException, Internal)
+	}
+	
+	int written = 0;
+	
+	// Header output yet?
+	if(!mHeaderWritten)
+	{
+		// Enough space?
+		if(NBytes < (int)sizeof(mHeader)) return 0;
+		
+		// Copy in
+		::memcpy(pBuffer, &mHeader, sizeof(mHeader));
+		NBytes -= sizeof(mHeader);
+		written += sizeof(mHeader);
+	
+		// Flag it's done
+		mHeaderWritten = true;
+	}
+
+	// How many entries can be written?
+	int entriesToWrite = NBytes / sizeof(file_BlockIndexEntry);
+	if(entriesToWrite > mNumEntriesToGo)
+	{
+		entriesToWrite = mNumEntriesToGo;
+	}
+	
+	// Setup ready to go
+	file_BlockIndexEntry *poutput = (file_BlockIndexEntry*)(((uint8_t*)pBuffer) + written);
+
+	// Write entries
+	for(int b = 0; b < entriesToWrite; ++b)
+	{
+		if(!mpDiff->ReadFullBuffer(&(poutput[b]), sizeof(file_BlockIndexEntry), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		
+		// Does this need adjusting?
+		int s = box_ntoh64(poutput[b].mEncodedSize);
+		if(s <= 0)
+		{
+			// A reference to a block in the from file
+			int block = 0 - s;
+			ASSERT(block >= 0);
+			if(block >= mNumEntriesInFromFile)
+			{
+				// That's not good, the block doesn't exist
+				THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+			}
+			
+			// Adjust the entry in the buffer
+			poutput[b].mEncodedSize = mFromBlockSizes[block];	// stored in network byte order, no translation necessary
+		}
+	}
+	
+	// Update written count
+	written += entriesToWrite * sizeof(file_BlockIndexEntry);
+	mNumEntriesToGo -= entriesToWrite;
+	
+	return written;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Write(const void *, int)
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+void BSFCombinedIndexStream::Write(const void *pBuffer, int NBytes)
+{
+	THROW_EXCEPTION(BackupStoreException, StreamDoesntHaveRequiredFeatures)
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::StreamDataLeft()
+//		Purpose: Private class. As interface
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+bool BSFCombinedIndexStream::StreamDataLeft()
+{
+	return (!mHeaderWritten) || (mNumEntriesToGo > 0);
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::StreamClosed()
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+bool BSFCombinedIndexStream::StreamClosed()
+{
+	return true;	// doesn't do writing
+}
+

Copied: box/trunk/lib/backupstore/BackupStoreFileCombine.cpp (from rev 2961, box/trunk/lib/backupclient/BackupStoreFileCombine.cpp)
===================================================================
--- box/trunk/lib/backupstore/BackupStoreFileCombine.cpp	                        (rev 0)
+++ box/trunk/lib/backupstore/BackupStoreFileCombine.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -0,0 +1,410 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCombine.cpp
+//		Purpose: File combining for BackupStoreFile
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+#include "FileStream.h"
+
+#include "MemLeakFindOn.h"
+
+typedef struct
+{
+	int64_t mFilePosition;
+} FromIndexEntry;
+
+static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries);
+static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks, IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
+static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineFile(IOStream &, IOStream &, IOStream &)
+//		Purpose: Where rDiff is a store file which is incomplete as a result of a
+//				 diffing operation, rFrom is the file it is diffed from, and 
+//				 rOut is the stream in which to place the result, the old file
+//				 and new file are combined into a file containing all the data.
+//				 rDiff2 is the same file as rDiff, opened again to get two
+//				 independent streams to the same file.
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::CombineFile(IOStream &rDiff, IOStream &rDiff2, IOStream &rFrom, IOStream &rOut)
+{
+	// Read and copy the header.
+	file_StreamFormat hdr;
+	if(!rDiff.ReadFullBuffer(&hdr, sizeof(hdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	// Copy
+	rOut.Write(&hdr, sizeof(hdr));
+	// Copy over filename and attributes
+	// BLOCK
+	{
+		BackupStoreFilename filename;
+		filename.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
+		filename.WriteToStream(rOut);
+		StreamableMemBlock attr;
+		attr.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
+		attr.WriteToStream(rOut);
+	}
+	
+	// Read the header for the From file
+	file_StreamFormat fromHdr;
+	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	// Skip over the filename and attributes of the From file
+	// BLOCK
+	{
+		BackupStoreFilename filename2;
+		filename2.ReadFromStream(rFrom, IOStream::TimeOutInfinite);
+		int32_t size_s;
+		if(!rFrom.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
+		{
+			THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
+		}
+		int size = ntohl(size_s);
+		// Skip forward the size
+		rFrom.Seek(size, IOStream::SeekType_Relative);		
+	}
+	
+	// Allocate memory for the block index of the From file
+	int64_t fromNumBlocks = box_ntoh64(fromHdr.mNumBlocks);
+	// NOTE: An extra entry is required so that the length of the last block can be calculated
+	FromIndexEntry *pFromIndex = (FromIndexEntry*)::malloc((fromNumBlocks+1) * sizeof(FromIndexEntry));
+	if(pFromIndex == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	try
+	{
+		// Load the index from the From file, calculating the offsets in the
+		// file as we go along, and enforce that everything should be present.
+		LoadFromIndex(rFrom, pFromIndex, fromNumBlocks);
+		
+		// Read in the block index of the Diff file in small chunks, and output data
+		// for each block, either from this file, or the other file.
+		int64_t diffNumBlocks = box_ntoh64(hdr.mNumBlocks);
+		CopyData(rDiff /* positioned at start of data */, rDiff2, diffNumBlocks, rFrom, pFromIndex, fromNumBlocks, rOut);
+		
+		// Read in the block index again, and output the new block index, simply
+		// filling in the sizes of blocks from the old file.
+		WriteNewIndex(rDiff, diffNumBlocks, pFromIndex, fromNumBlocks, rOut);
+		
+		// Free buffers
+		::free(pFromIndex);
+		pFromIndex = 0;
+	}
+	catch(...)
+	{
+		// Clean up
+		if(pFromIndex != 0)
+		{
+			::free(pFromIndex);
+			pFromIndex = 0;
+		}	
+		throw;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static LoadFromIndex(IOStream &, FromIndexEntry *, int64_t)
+//		Purpose: Static. Load the index from the From file
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries)
+{
+	ASSERT(pIndex != 0);
+	ASSERT(NumEntries >= 0);
+
+	// Get the starting point in the file
+	int64_t filePos = rFrom.GetPosition();
+	
+	// Jump to the end of the file to read the index
+	rFrom.Seek(0 - ((NumEntries * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader blkhdr;
+	if(!rFrom.ReadFullBuffer(&blkhdr, sizeof(blkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(blkhdr.mNumBlocks) != NumEntries)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// And then the block entries
+	for(int64_t b = 0; b < NumEntries; ++b)
+	{
+		// Read
+		file_BlockIndexEntry en;
+		if(!rFrom.ReadFullBuffer(&en, sizeof(en), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		
+		// Add to list
+		pIndex[b].mFilePosition = filePos;
+
+		// Encoded size?
+		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+		// Check that the block is actually there
+		if(encodedSize <= 0)
+		{
+			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+		}
+
+		// Move file pointer on
+		filePos += encodedSize;
+	}
+	
+	// Store the position in the very last entry, so the size of the last entry can be calculated
+	pIndex[NumEntries].mFilePosition = filePos;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static CopyData(IOStream &, IOStream &, int64_t, IOStream &, FromIndexEntry *, int64_t, IOStream &)
+//		Purpose: Static. Copy data from the Diff and From file to the out file.
+//				 rDiffData is at beginning of data.
+//				 rDiffIndex at any position.
+//				 rFrom is at any position.
+//				 rOut is after the header, ready for data
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks,
+	IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
+{
+	// Jump to the end of the diff file to read the index
+	rDiffIndex.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader diffBlkhdr;
+	if(!rDiffIndex.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Record where the From file is
+	int64_t fromPos = rFrom.GetPosition();
+	
+	// Buffer data
+	void *buffer = 0;
+	int bufferSize = 0;
+	
+	try
+	{
+		// Read the blocks in!
+		for(int64_t b = 0; b < DiffNumBlocks; ++b)
+		{
+			// Read
+			file_BlockIndexEntry en;
+			if(!rDiffIndex.ReadFullBuffer(&en, sizeof(en), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+			}
+			
+			// What's the size value stored in the entry
+			int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+			
+			// How much data will be read?
+			int32_t blockSize = 0;
+			if(encodedSize > 0)
+			{
+				// The block is actually in the diff file
+				blockSize = encodedSize;
+			}
+			else
+			{
+				// It's in the from file. First, check to see if it's valid
+				int64_t blockIdx = (0 - encodedSize);
+				if(blockIdx > FromNumBlocks)
+				{
+					// References a block which doesn't actually exist
+					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+				}
+				// Calculate size. This operation is safe because of the extra entry at the end
+				blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
+			}
+			ASSERT(blockSize > 0);
+			
+			// Make sure there's memory available to copy this
+			if(bufferSize < blockSize || buffer == 0)
+			{
+				// Free old block
+				if(buffer != 0)
+				{
+					::free(buffer);
+					buffer = 0;
+					bufferSize = 0;
+				}
+				// Allocate new block
+				buffer = ::malloc(blockSize);
+				if(buffer == 0)
+				{
+					throw std::bad_alloc();
+				}
+				bufferSize = blockSize;
+			}
+			ASSERT(bufferSize >= blockSize);
+			
+			// Load in data from one of the files
+			if(encodedSize > 0)
+			{
+				// Load from diff file
+				if(!rDiffData.ReadFullBuffer(buffer, blockSize, 0))
+				{
+					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+				}				
+			}
+			else
+			{
+				// Locate and read the data from the from file
+				int64_t blockIdx = (0 - encodedSize);
+				// Seek if necessary
+				if(fromPos != pFromIndex[blockIdx].mFilePosition)
+				{
+					rFrom.Seek(pFromIndex[blockIdx].mFilePosition, IOStream::SeekType_Absolute);
+					fromPos = pFromIndex[blockIdx].mFilePosition;
+				}
+				// Read
+				if(!rFrom.ReadFullBuffer(buffer, blockSize, 0))
+				{
+					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+				}
+				
+				// Update fromPos to current position
+				fromPos += blockSize;
+			}
+			
+			// Write data to out file
+			rOut.Write(buffer, blockSize);
+		}
+		
+		// Free buffer, if allocated
+		if(buffer != 0)
+		{
+			::free(buffer);
+			buffer = 0;
+		}
+	}
+	catch(...)
+	{
+		if(buffer != 0)
+		{
+			::free(buffer);
+			buffer = 0;
+		}
+		throw;
+	}
+}
+
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static WriteNewIndex(IOStream &, int64_t, FromIndexEntry *, int64_t, IOStream &)
+//		Purpose: Write the index to the out file, just copying from the diff file and
+//				 adjusting the entries.
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
+{
+	// Jump to the end of the diff file to read the index
+	rDiff.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader diffBlkhdr;
+	if(!rDiff.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Write it out with a blanked out other file ID
+	diffBlkhdr.mOtherFileID = box_hton64(0);
+	rOut.Write(&diffBlkhdr, sizeof(diffBlkhdr));
+	
+	// Rewrite the index
+	for(int64_t b = 0; b < DiffNumBlocks; ++b)
+	{
+		file_BlockIndexEntry en;
+		if(!rDiff.ReadFullBuffer(&en, sizeof(en), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		
+		// What's the size value stored in the entry
+		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+		
+		// Need to adjust it?
+		if(encodedSize <= 0)
+		{
+			// This actually refers to a block in the from file. So rewrite this.
+			int64_t blockIdx = (0 - encodedSize);
+			if(blockIdx > FromNumBlocks)
+			{
+				// References a block which doesn't actually exist
+				THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+			}
+			// Calculate size. This operation is safe because of the extra entry at the end
+			int32_t blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
+			// Then replace entry
+			en.mEncodedSize = box_hton64(((uint64_t)blockSize));
+		}
+		
+		// Write entry
+		rOut.Write(&en, sizeof(en));
+	}
+}
+
+
+
+
+

Copied: box/trunk/lib/backupstore/BackupStoreFileDiff.cpp (from rev 2961, box/trunk/lib/backupclient/BackupStoreFileDiff.cpp)
===================================================================
--- box/trunk/lib/backupstore/BackupStoreFileDiff.cpp	                        (rev 0)
+++ box/trunk/lib/backupstore/BackupStoreFileDiff.cpp	2011-05-24 15:08:59 UTC (rev 2963)
@@ -0,0 +1,1046 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileDiff.cpp
+//		Purpose: Functions relating to diffing BackupStoreFiles
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <string.h>
+
+#include <new>
+#include <map>
+
+#ifdef HAVE_TIME_H
+	#include <time.h>
+#elif HAVE_SYS_TIME_H
+	#include <sys/time.h>
+#endif
+
+#include "BackupStoreConstants.h"
+#include "BackupStoreException.h"
+#include "BackupStoreFile.h"
+#include "BackupStoreFileCryptVar.h"
+#include "BackupStoreFileEncodeStream.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "CommonException.h"
+#include "FileStream.h"
+#include "MD5Digest.h"
+#include "RollingChecksum.h"
+#include "Timer.h"
+
+#include "MemLeakFindOn.h"
+
+#include <cstring>
+
+using namespace BackupStoreFileCryptVar;
+using namespace BackupStoreFileCreation;
+
+// By default, don't trace out details of the diff as we go along -- would fill up logs significantly.
+// But it's useful for the test.
+#ifndef BOX_RELEASE_BUILD
+	bool BackupStoreFile::TraceDetailsOfDiffProcess = false;
+#endif
+
+static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis);
+static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]);
+static void SearchForMatchingBlocks(IOStream &rFile, 
+	std::map<int64_t, int64_t> &rFoundBlocks, BlocksAvailableEntry *pIndex, 
+	int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES],
+	DiffTimer *pDiffTimer);
+static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable);
+static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, int Offset, int32_t BlockSize, int64_t FileBlockNumber,
+BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks);
+static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex, int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile);
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &)
+//		Purpose: Move the file pointer in this stream to just before the block index.
+//				 Assumes that the stream is at the beginning, seekable, and
+//				 reading from the stream is OK.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &rStream)
+{
+	// Size of file
+	int64_t fileSize = rStream.BytesLeftToRead();
+
+	// Get header
+	file_StreamFormat hdr;
+
+	// Read the header
+	if(!rStream.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, IOStream::TimeOutInfinite))
+	{
+		// Couldn't read header
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+
+	// Check magic number
+	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+		&& ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V0
+#endif
+		)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Work out where the index is
+	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
+	int64_t blockHeaderPosFromEnd = ((numBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader));
+	
+	// Sanity check
+	if(blockHeaderPosFromEnd > static_cast<int64_t>(fileSize - sizeof(file_StreamFormat)))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Seek to that position
+	rStream.Seek(0 - blockHeaderPosFromEnd, IOStream::SeekType_End);
+	
+	// Done. Stream now in right position (as long as the file is formatted correctly)
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::EncodeFileDiff(const char *, int64_t, const BackupStoreFilename &, int64_t, IOStream &, int64_t *)
+//		Purpose: Similar to EncodeFile, but takes the object ID of the file it's
+//				 diffing from, and the index of the blocks in a stream. It'll then
+//				 calculate which blocks can be reused from that old file.
+//				 The timeout is the timeout value for reading the diff block index.
+//				 If pIsCompletelyDifferent != 0, it will be set to true if the
+//				 the two files are completely different (do not share any block), false otherwise.
+//				 
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+std::auto_ptr<IOStream> BackupStoreFile::EncodeFileDiff
+(
+	const char *Filename, int64_t ContainerID,
+	const BackupStoreFilename &rStoreFilename, int64_t DiffFromObjectID, 
+	IOStream &rDiffFromBlockIndex, int Timeout, DiffTimer *pDiffTimer, 
+	int64_t *pModificationTime, bool *pIsCompletelyDifferent)
+{
+	// Is it a symlink?
+	{
+		EMU_STRUCT_STAT st;
+		if(EMU_LSTAT(Filename, &st) != 0)
+		{
+			THROW_EXCEPTION(CommonException, OSFileError)
+		}
+		if((st.st_mode & S_IFLNK) == S_IFLNK)
+		{
+			// Don't do diffs for symlinks
+			if(pIsCompletelyDifferent != 0)
+			{
+				*pIsCompletelyDifferent = true;
+			}
+			return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
+		}
+	}
+
+	// Load in the blocks
+	BlocksAvailableEntry *pindex = 0;
+	int64_t blocksInIndex = 0;
+	bool canDiffFromThis = false;
+	LoadIndex(rDiffFromBlockIndex, DiffFromObjectID, &pindex, blocksInIndex, Timeout, canDiffFromThis);
+	// BOX_TRACE("Diff: Blocks in index: " << blocksInIndex);
+	
+	if(!canDiffFromThis)
+	{
+		// Don't do diffing...
+		if(pIsCompletelyDifferent != 0)
+		{
+			*pIsCompletelyDifferent = true;
+		}
+		return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
+	}
+	
+	// Pointer to recipe we're going to create
+	BackupStoreFileEncodeStream::Recipe *precipe = 0;
+	
+	try
+	{
+		// Find which sizes should be scanned
+		int32_t sizesToScan[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
+		FindMostUsedSizes(pindex, blocksInIndex, sizesToScan);
+		
+		// Flag for reporting to the user
+		bool completelyDifferent;
+			
+		// BLOCK
+		{
+			// Search the file to find matching blocks
+			std::map<int64_t, int64_t> foundBlocks; // map of offset in file to index in block index
+			int64_t sizeOfInputFile = 0;
+			// BLOCK
+			{
+				FileStream file(Filename);
+				// Get size of file
+				sizeOfInputFile = file.BytesLeftToRead();
+				// Find all those lovely matching blocks
+				SearchForMatchingBlocks(file, foundBlocks, pindex, 
+					blocksInIndex, sizesToScan, pDiffTimer);
+				
+				// Is it completely different?
+				completelyDifferent = (foundBlocks.size() == 0);
+			}
+			
+			// Create a recipe -- if the two files are completely different, don't put the from file ID in the recipe.
+			precipe = new BackupStoreFileEncodeStream::Recipe(pindex, blocksInIndex, completelyDifferent?(0):(DiffFromObjectID));
+			BlocksAvailableEntry *pindexKeptRef = pindex;	// we need this later, but must set pindex == 0 now, because of exceptions
+			pindex = 0;		// Recipe now has ownership
+			
+			// Fill it in
+			GenerateRecipe(*precipe, pindexKeptRef, blocksInIndex, foundBlocks, sizeOfInputFile);
+		}
+		// foundBlocks no longer required
+		
+		// Create the stream
+		std::auto_ptr<IOStream> stream(new BackupStoreFileEncodeStream);
+	
+		// Do the initial setup
+		((BackupStoreFileEncodeStream*)stream.get())->Setup(Filename, precipe, ContainerID, rStoreFilename, pModificationTime);
+		precipe = 0;	// Stream has taken ownership of this
+		
+		// Tell user about completely different status?
+		if(pIsCompletelyDifferent != 0)
+		{
+			*pIsCompletelyDifferent = completelyDifferent;
+		}
+	
+		// Return the stream for the caller
+		return stream;
+	}
+	catch(...)
+	{
+		// cleanup
+		if(pindex != 0)
+		{
+			::free(pindex);
+			pindex = 0;
+		}
+		if(precipe != 0)
+		{
+			delete precipe;
+			precipe = 0;
+		}
+		throw;
+	}
+}
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static LoadIndex(IOStream &, int64_t, BlocksAvailableEntry **, int64_t, bool &)
+//		Purpose: Read in an index, and decrypt, and store in the in memory block format.
+//				 rCanDiffFromThis is set to false if the version of the from file is too old.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis)
+{
+	// Reset
+	rNumBlocksOut = 0;
+	rCanDiffFromThis = false;
+	
+	// Read header
+	file_BlockIndexHeader hdr;
+	if(!rBlockIndex.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, Timeout))
+	{
+		// Couldn't read header
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+	// Check against backwards comptaibility stuff
+	if(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0))
+	{
+		// Won't diff against old version
+		
+		// Absorb rest of stream
+		char buffer[2048];
+		while(rBlockIndex.StreamDataLeft())
+		{
+			rBlockIndex.Read(buffer, sizeof(buffer), 1000 /* 1 sec timeout */);
+		}
+		
+		// Tell caller
+		rCanDiffFromThis = false;
+		return;
+	}
+#endif
+
+	// Check magic
+	if(hdr.mMagicValue != (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Check that we're not trying to diff against a file which references blocks from another file
+	if(((int64_t)box_ntoh64(hdr.mOtherFileID)) != 0)
+	{
+		THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
+	}
+
+	// Mark as an acceptable diff.
+	rCanDiffFromThis = true;
+
+	// Get basic information
+	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
+	uint64_t entryIVBase = box_ntoh64(hdr.mEntryIVBase);
+	
+	//TODO: Verify that these sizes look reasonable
+	
+	// Allocate space for the index
+	BlocksAvailableEntry *pindex = (BlocksAvailableEntry*)::malloc(sizeof(BlocksAvailableEntry) * numBlocks);
+	if(pindex == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	try
+	{	
+		for(int64_t b = 0; b < numBlocks; ++b)
+		{
+			// Read an entry from the stream
+			file_BlockIndexEntry entry;
+			if(!rBlockIndex.ReadFullBuffer(&entry, sizeof(entry), 0 /* not interested in bytes read if this fails */, Timeout))
+			{
+				// Couldn't read entry
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}	
+		
+			// Calculate IV for this entry
+			uint64_t iv = entryIVBase;
+			iv += b;
+			// Network byte order
+			iv = box_hton64(iv);
+			sBlowfishDecryptBlockEntry.SetIV(&iv);			
+			
+			// Decrypt the encrypted section
+			file_BlockIndexEntryEnc entryEnc;
+			int sectionSize = sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
+					entry.mEnEnc, sizeof(entry.mEnEnc));
+			if(sectionSize != sizeof(entryEnc))
+			{
+				THROW_EXCEPTION(BackupStoreException, BlockEntryEncodingDidntGiveExpectedLength)
+			}
+
+			// Check that we're not trying to diff against a file which references blocks from another file
+			if(((int64_t)box_ntoh64(entry.mEncodedSize)) <= 0)
+			{
+				THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
+			}
+			
+			// Store all the required information
+			pindex[b].mpNextInHashList = 0;	// hash list not set up yet
+			pindex[b].mSize = ntohl(entryEnc.mSize);
+			pindex[b].mWeakChecksum = ntohl(entryEnc.mWeakChecksum);
+			::memcpy(pindex[b].mStrongChecksum, entryEnc.mStrongChecksum, sizeof(pindex[b].mStrongChecksum));
+		}
+		
+		// Store index pointer for called
+		ASSERT(ppIndex != 0);
+		*ppIndex = pindex;
+
+		// Store number of blocks for caller
+		rNumBlocksOut = numBlocks;	
+
+	}
+	catch(...)
+	{
+		// clean up and send the exception along its way
+		::free(pindex);
+		throw;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static FindMostUsedSizes(BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+//		Purpose: Finds the most commonly used block sizes in the index
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+{
+	// Array for lengths
+	int64_t sizeCounts[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
+
+	// Set arrays to lots of zeros (= unused entries)
+	for(int l = 0; l < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++l)
+	{
+		Sizes[l] = 0;
+		sizeCounts[l] = 0;
+	}
+
+	// Array for collecting sizes
+	std::map<int32_t, int64_t> foundSizes;
+	
+	// Run through blocks and make a count of the entries
+	for(int64_t b = 0; b < NumBlocks; ++b)
+	{
+		// Only if the block size is bigger than the minimum size we'll scan for
+		if(pIndex[b].mSize > BACKUP_FILE_DIFF_MIN_BLOCK_SIZE)
+		{
+			// Find entry?
+			std::map<int32_t, int64_t>::const_iterator f(foundSizes.find(pIndex[b].mSize));
+			if(f != foundSizes.end())
+			{
+				// Increment existing entry
+				foundSizes[pIndex[b].mSize] = foundSizes[pIndex[b].mSize] + 1;
+			}
+			else
+			{
+				// New entry
+				foundSizes[pIndex[b].mSize] = 1;
+			}
+		}
+	}
+	
+	// Make the block sizes
+	for(std::map<int32_t, int64_t>::const_iterator i(foundSizes.begin()); i != foundSizes.end(); ++i)
+	{
+		// Find the position of the size in the array
+		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
+		{
+			// Instead of sorting on the raw count of blocks,
+			// take the file area covered by this block size.
+			if(i->second * i->first > sizeCounts[t] * Sizes[t])
+			{
+				// Then this size belong before this entry -- shuffle them up
+				for(int s = (BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1); s >= t; --s)
+				{
+					Sizes[s] = Sizes[s-1];
+					sizeCounts[s] = sizeCounts[s-1];
+				}
+				
+				// Insert this size
+				Sizes[t] = i->first;
+				sizeCounts[t] = i->second;
+				
+				// Shouldn't do any more searching
+				break;
+			}
+		}
+	}
+	
+	// trace the size table in debug builds
+#ifndef BOX_RELEASE_BUILD
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
+		{
+			BOX_TRACE("Diff block size " << t << ": " <<
+				Sizes[t] << " (count = " << 
+				sizeCounts[t] << ")");
+		}
+	}
+#endif
+}
+
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static SearchForMatchingBlocks(IOStream &, std::map<int64_t, int64_t> &, BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+//		Purpose: Find the matching blocks within the file.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void SearchForMatchingBlocks(IOStream &rFile, std::map<int64_t, int64_t> &rFoundBlocks,
+	BlocksAvailableEntry *pIndex, int64_t NumBlocks, 
+	int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES], DiffTimer *pDiffTimer)
+{
+	Timer maximumDiffingTime(0, "MaximumDiffingTime");
+
+	if(pDiffTimer && pDiffTimer->IsManaged())
+	{
+		maximumDiffingTime = Timer(pDiffTimer->GetMaximumDiffingTime(),
+			"MaximumDiffingTime");
+	}
+	
+	std::map<int64_t, int32_t> goodnessOfFit;
+
+	// Allocate the hash lookup table
+	BlocksAvailableEntry **phashTable = (BlocksAvailableEntry **)::malloc(sizeof(BlocksAvailableEntry *) * (64*1024));
+
+	// Choose a size for the buffer, just a little bit more than the maximum block size
+	int32_t bufSize = Sizes[0];
+	for(int z = 1; z < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++z)
+	{
+		if(Sizes[z] > bufSize) bufSize = Sizes[z];
+	}
+	bufSize += 4;
+	ASSERT(bufSize > Sizes[0]);
+	ASSERT(bufSize > 0);
+	if(bufSize > (BACKUP_FILE_MAX_BLOCK_SIZE + 1024))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// TODO: Because we read in the file a scanned block size at a time,
+	// it is likely to be inefficient. Probably will be much better to
+	// calculate checksums for all block sizes in a single pass.
+
+	// Allocate the buffers.
+	uint8_t *pbuffer0 = (uint8_t *)::malloc(bufSize);
+	uint8_t *pbuffer1 = (uint8_t *)::malloc(bufSize);
+	try
+	{
+		// Check buffer allocation
+		if(pbuffer0 == 0 || pbuffer1 == 0 || phashTable == 0)
+		{
+			// If a buffer got allocated, it will be cleaned up in the catch block
+			throw std::bad_alloc();
+		}
+		
+		// Flag to abort the run, if too many blocks are found -- avoid using
+		// huge amounts of processor time when files contain many similar blocks.
+		bool abortSearch = false;
+		
+		// Search for each block size in turn
+		// NOTE: Do the smallest size first, so that the scheme for adding
+		// entries in the found list works as expected and replaces smallers block
+		// with larger blocks when it finds matches at the same offset in the file.
+		for(int s = BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1; s >= 0; --s)
+		{
+			ASSERT(Sizes[s] <= bufSize);
+			BOX_TRACE("Diff pass " << s << ", for block size " <<
+				Sizes[s]);
+			
+			// Check we haven't finished
+			if(Sizes[s] == 0)
+			{
+				// empty entry, try next size
+				continue;
+			}
+			
+			// Set up the hash table entries
+			SetupHashTable(pIndex, NumBlocks, Sizes[s], phashTable);
+		
+			// Shift file position to beginning
+			rFile.Seek(0, IOStream::SeekType_Absolute);
+			
+			// Read first block
+			if(rFile.Read(pbuffer0, Sizes[s]) != Sizes[s])
+			{
+				// Size of file too short to match -- do next size
+				continue;
+			}
+			
+			// Setup block pointers
+			uint8_t *beginnings = pbuffer0;
+			uint8_t *endings = pbuffer1;
+			int offset = 0;
+			
+			// Calculate the first checksum, ready for rolling
+			RollingChecksum rolling(beginnings, Sizes[s]);
+			
+			// Then roll, until the file is exhausted
+			int64_t fileBlockNumber = 0;
+			int64_t fileOffset = 0;
+			int rollOverInitialBytes = 0;
+			while(true)
+			{
+				if(maximumDiffingTime.HasExpired())
+				{
+					ASSERT(pDiffTimer != NULL);
+					BOX_INFO("MaximumDiffingTime reached - "
+						"suspending file diff");
+					abortSearch = true;
+					break;
+				}
+				
+				if(pDiffTimer)
+				{
+					pDiffTimer->DoKeepAlive();
+				}
+				
+				// Load in another block of data, and record how big it is
+				int bytesInEndings = rFile.Read(endings, Sizes[s]);
+				int tmp;
+
+				// Skip any bytes from a previous matched block
+				if(rollOverInitialBytes > 0 && offset < bytesInEndings)
+				{
+					int spaceLeft = bytesInEndings - offset;
+					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
+
+					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+					offset += thisRoll;
+					fileOffset += thisRoll;
+					rollOverInitialBytes -= thisRoll;
+
+					if(rollOverInitialBytes)
+					{
+						goto refresh;
+					}
+				}
+
+				if(goodnessOfFit.count(fileOffset))
+				{
+					tmp = goodnessOfFit[fileOffset];
+				}
+				else
+				{
+					tmp = 0;
+				}
+
+				if(tmp >= Sizes[s])
+				{
+					// Skip over bigger ready-matched blocks completely
+					rollOverInitialBytes = tmp;
+					int spaceLeft = bytesInEndings - offset;
+					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
+
+					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+					offset += thisRoll;
+					fileOffset += thisRoll;
+					rollOverInitialBytes -= thisRoll;
+
+					if(rollOverInitialBytes)
+					{
+						goto refresh;
+					}
+				}
+
+				while(offset < bytesInEndings)
+				{
+					// Is current checksum in hash list?
+					uint16_t hash = rolling.GetComponentForHashing();
+					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
+					{
+						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
+						{
+							BOX_TRACE("Found block match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
+							goodnessOfFit[fileOffset] = Sizes[s];
+
+							// Block matched, roll the checksum forward to the next block without doing
+							// any more comparisons, because these are pointless (as any more matches will be ignored when
+							// the recipe is generated) and just take up valuable processor time. Edge cases are
+							// especially nasty, using huge amounts of time and memory.
+							int skip = Sizes[s];
+							if(offset < bytesInEndings && skip > 0)
+							{
+								int spaceLeft = bytesInEndings - offset;
+								int thisRoll = (skip > spaceLeft) ? spaceLeft : skip;
+
+								rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+								offset += thisRoll;
+								fileOffset += thisRoll;
+								skip -= thisRoll;
+							}
+							// Not all the bytes necessary will have been skipped, so get them
+							// skipped after the next block is loaded.
+							rollOverInitialBytes = skip;
+							
+							// End this loop, so the final byte isn't used again
+							break;
+						}
+						else
+						{
+							BOX_TRACE("False alarm match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
+						}
+
+						int64_t NumBlocksFound = static_cast<int64_t>(
+							rFoundBlocks.size());
+						int64_t MaxBlocksFound = NumBlocks * 
+							BACKUP_FILE_DIFF_MAX_BLOCK_FIND_MULTIPLE;
+						
+						if(NumBlocksFound > MaxBlocksFound)
+						{
+							abortSearch = true;
+							break;
+						}
+					}
+					
+					// Roll checksum forward
+					rolling.RollForward(beginnings[offset], endings[offset], Sizes[s]);
+				
+					// Increment offsets
+					++offset;
+					++fileOffset;
+				}
+				
+				if(abortSearch) break;
+				
+			refresh:
+				// Finished?
+				if(bytesInEndings != Sizes[s])
+				{
+					// No more data in file -- check the final block
+					// (Do a copy and paste of 5 lines of code instead of introducing a comparison for
+					// each byte of the file)
+					uint16_t hash = rolling.GetComponentForHashing();
+					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
+					{
+						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
+						{
+							goodnessOfFit[fileOffset] = Sizes[s];
+						}
+					}
+
+					// finish
+					break;
+				}
+				
+				// Switch buffers, reset offset
+				beginnings = endings;
+				endings = (beginnings == pbuffer0)?(pbuffer1):(pbuffer0);	// ie the other buffer
+				offset = 0;
+
+				// And count the blocks which have been done
+				++fileBlockNumber;
+			}
+
+			if(abortSearch) break;
+		}
+		
+		// Free buffers and hash table
+		::free(pbuffer1);
+		pbuffer1 = 0;
+		::free(pbuffer0);
+		pbuffer0 = 0;
+		::free(phashTable);
+		phashTable = 0;
+	}
+	catch(...)
+	{
+		// Cleanup and throw
+		if(pbuffer1 != 0) ::free(pbuffer1);
+		if(pbuffer0 != 0) ::free(pbuffer0);
+		if(phashTable != 0) ::free(phashTable);
+		throw;
+	}
+	
+#ifndef BOX_RELEASE_BUILD
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		// Trace out the found blocks in debug mode
+		BOX_TRACE("Diff: list of found blocks");
+		BOX_TRACE("======== ======== ======== ========");
+		BOX_TRACE("  Offset   BlkIdx     Size Movement");
+		for(std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin()); i != rFoundBlocks.end(); ++i)
+		{
+			int64_t orgLoc = 0;
+			for(int64_t b = 0; b < i->second; ++b)
+			{
+				orgLoc += pIndex[b].mSize;
+			}
+			BOX_TRACE(std::setw(8) << i->first << " " <<
+				std::setw(8) << i->second << " " <<
+				std::setw(8) << pIndex[i->second].mSize << 
+				" " << 
+				std::setw(8) << (i->first - orgLoc));
+		}
+		BOX_TRACE("======== ======== ======== ========");
+	}
+#endif
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static SetupHashTable(BlocksAvailableEntry *, int64_t, in32_t, BlocksAvailableEntry **)
+//		Purpose: Set up the hash table ready for a scan
+//		Created: 14/1/04
+//
+// --------------------------------------------------------------------------
+static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable)
+{
+	// Set all entries in the hash table to zero
+	::memset(pHashTable, 0, (sizeof(BlocksAvailableEntry *) * (64*1024)));
+
+	// Scan through the blocks, building the hash table
+	for(int64_t b = 0; b < NumBlocks; ++b)
+	{
+		// Only look at the required block size
+		if(pIndex[b].mSize == BlockSize)
+		{
+			// Get the value under which to hash this entry
+			uint16_t hash = RollingChecksum::ExtractHashingComponent(pIndex[b].mWeakChecksum);
+			
+			// Already present in table?
+			if(pHashTable[hash] != 0)
+			{
+				//BOX_TRACE("Another hash entry for " << hash << " found");
+				// Yes -- need to set the pointer in this entry to the current entry to build the linked list
+				pIndex[b].mpNextInHashList = pHashTable[hash];
+			}
+
+			// Put a pointer to this entry in the hash table
+			pHashTable[hash] = pIndex + b;
+		}
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static bool SecondStageMatch(xxx)
+//		Purpose: When a match in the hash table is found, scan for second stage match using strong checksum.
+//		Created: 14/1/04
+//
+// --------------------------------------------------------------------------
+static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings,
+	int Offset, int32_t BlockSize, int64_t FileBlockNumber, BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks)
+{
+	// Check parameters
+	ASSERT(pBeginnings != 0);
+	ASSERT(pEndings != 0);
+	ASSERT(Offset >= 0);
+	ASSERT(BlockSize > 0);
+	ASSERT(pFirstInHashList != 0);
+	ASSERT(pIndex != 0);
+
+#ifndef BOX_RELEASE_BUILD
+	uint16_t DEBUG_Hash = fastSum.GetComponentForHashing();
+#endif
+	uint32_t Checksum = fastSum.GetChecksum();
+
+	// Before we go to the expense of the MD5, make sure it's a darn good match on the checksum we already know.
+	BlocksAvailableEntry *scan = pFirstInHashList;
+	bool found=false;
+	while(scan != 0)
+	{
+		if(scan->mWeakChecksum == Checksum)
+		{
+			found = true;
+			break;
+		}
+		scan = scan->mpNextInHashList;
+	}
+	if(!found)
+	{
+		return false;
+	}
+
+	// Calculate the strong MD5 digest for this block
+	MD5Digest strong;
+	// Add the data from the beginnings
+	strong.Add(pBeginnings + Offset, BlockSize - Offset);
+	// Add any data from the endings
+	if(Offset > 0)
+	{
+		strong.Add(pEndings, Offset);
+	}
+	strong.Finish();
+	
+	// Then go through the entries in the hash list, comparing with the strong digest calculated
+	scan = pFirstInHashList;
+	//BOX_TRACE("second stage match");
+	while(scan != 0)
+	{
+		//BOX_TRACE("scan size " << scan->mSize <<
+		//	", block size " << BlockSize <<
+		//	", hash " << Hash);
+		ASSERT(scan->mSize == BlockSize);
+		ASSERT(RollingChecksum::ExtractHashingComponent(scan->mWeakChecksum) == DEBUG_Hash);
+	
+		// Compare?
+		if(strong.DigestMatches(scan->mStrongChecksum))
+		{
+			//BOX_TRACE("Match!\n");
+			// Found! Add to list of found blocks...
+			int64_t fileOffset = (FileBlockNumber * BlockSize) + Offset;
+			int64_t blockIndex = (scan - pIndex);	// pointer arthmitic is frowned upon. But most efficient way of doing it here -- alternative is to use more memory
+			
+			// We do NOT search for smallest blocks first, as this code originally assumed.
+			// To prevent this from potentially overwriting a better match, the caller must determine
+			// the relative "goodness" of any existing match and this one, and avoid the call if it
+			// could be detrimental.
+			rFoundBlocks[fileOffset] = blockIndex;
+			
+			// No point in searching further, report success
+			return true;
+		}
+	
+		// Next
+		scan = scan->mpNextInHashList;
+	}
+	
+	// Not matched
+	return false;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static GenerateRecipe(BackupStoreFileEncodeStream::Recipe &, BlocksAvailableEntry *, int64_t, std::map<int64_t, int64_t> &)
+//		Purpose: Fills in the recipe from the found block list
+//		Created: 15/1/04
+//
+// --------------------------------------------------------------------------
+static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex,
+		int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile)
+{
+	// NOTE: This function could be a lot more sophisiticated. For example, if
+	// a small block overlaps a big block like this
+	//   ****
+	//      *******************************
+	// then the small block will be used, not the big one. But it'd be better to
+	// just ignore the small block and keep the big one. However, some stats should
+	// be gathered about real world files before writing complex code which might
+	// go wrong.
+
+	// Initialise a blank instruction
+	BackupStoreFileEncodeStream::RecipeInstruction instruction;
+	#define RESET_INSTRUCTION			\
+	instruction.mSpaceBefore = 0;		\
+	instruction.mBlocks = 0;			\
+	instruction.mpStartBlock = 0;
+	RESET_INSTRUCTION
+
+	// First, a special case for when there are no found blocks
+	if(rFoundBlocks.size() == 0)
+	{
+		// No blocks, just a load of space
+		instruction.mSpaceBefore = SizeOfInputFile;
+		rRecipe.push_back(instruction);	
+	
+		#ifndef BOX_RELEASE_BUILD
+		if(BackupStoreFile::TraceDetailsOfDiffProcess)
+		{
+			BOX_TRACE("Diff: Default recipe generated, " << 
+				SizeOfInputFile << " bytes of file");
+		}
+		#endif
+		
+		// Don't do anything
+		return;
+	}
+
+	// Current location
+	int64_t loc = 0;
+
+	// Then iterate through the list, generating the recipe
+	std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin());
+	ASSERT(i != rFoundBlocks.end());	// check logic
+
+	// Counting for debug tracing
+#ifndef BOX_RELEASE_BUILD
+	int64_t debug_NewBytesFound = 0;
+	int64_t debug_OldBlocksUsed = 0;
+#endif
+	
+	for(; i != rFoundBlocks.end(); ++i)
+	{
+		// Remember... map is (position in file) -> (index of block in pIndex)
+		
+		if(i->first < loc)
+		{
+			// This block overlaps the last one
+			continue;
+		}
+		else if(i->first > loc)
+		{
+			// There's a gap between the end of the last thing and this block.
+			// If there's an instruction waiting, push it onto the list
+			if(instruction.mSpaceBefore != 0 || instruction.mpStartBlock != 0)
+			{
+				rRecipe.push_back(instruction);
+			}
+			// Start a new instruction, with the gap ready
+			RESET_INSTRUCTION
+			instruction.mSpaceBefore = i->first - loc;
+			// Move location forward to match
+			loc += instruction.mSpaceBefore;
+#ifndef BOX_RELEASE_BUILD
+			debug_NewBytesFound += instruction.mSpaceBefore;
+#endif
+		}
+		
+		// First, does the current instruction need pushing back, because this block is not
+		// sequential to the last one?
+		if(instruction.mpStartBlock != 0 && (pIndex + i->second) != (instruction.mpStartBlock + instruction.mBlocks))
+		{
+			rRecipe.push_back(instruction);
+			RESET_INSTRUCTION
+		}
+		
+		// Add in this block
+		if(instruction.mpStartBlock == 0)
+		{
+			// This block starts a new instruction
+			instruction.mpStartBlock = pIndex + i->second;
+			instruction.mBlocks = 1;
+		}
+		else
+		{
+			// It continues the previous section of blocks
+			instruction.mBlocks += 1;
+		}
+
+#ifndef BOX_RELEASE_BUILD
+		debug_OldBlocksUsed++;
+#endif
+
+		// Move location forward
+		loc += pIndex[i->second].mSize;
+	}
+	
+	// Push the last instruction generated
+	rRecipe.push_back(instruction);
+	
+	// Is there any space left at the end which needs sending?
+	if(loc != SizeOfInputFile)
+	{
+		RESET_INSTRUCTION
+		instruction.mSpaceBefore = SizeOfInputFile - loc;
+#ifndef BOX_RELEASE_BUILD
+		debug_NewBytesFound += instruction.mSpaceBefore;
+#endif
+		rRecipe.push_back(instruction);		
+	}
+	
+	// dump out the recipe
+#ifndef BOX_RELEASE_BUILD
+	BOX_TRACE("Diff: " << 
+		debug_NewBytesFound << " new bytes found, " <<
+		debug_OldBlocksUsed << " old blocks used");
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		BOX_TRACE("Diff: Recipe generated (size " << rRecipe.size());
+		BOX_TRACE("======== ========= ========");
+		BOX_TRACE("Space b4 FirstBlk  NumBlks");
+		{
+			for(unsigned int e = 0; e < rRecipe.size(); ++e)
+			{
+				char b[64];
+#ifdef WIN32
+				sprintf(b, "%8I64d", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
+#else
+				sprintf(b, "%8lld", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
+#endif
+				BOX_TRACE(std::setw(8) <<
+					rRecipe[e].mSpaceBefore <<
+					" " <<
+					((rRecipe[e].mpStartBlock == 0)?"       -":b) <<
+					" " << std::setw(8) <<
+					rRecipe[e].mBlocks);
+			}
+		}
+		BOX_TRACE("======== ========= ========");
+	}
+#endif
+}

Modified: box/trunk/modules.txt
===================================================================
--- box/trunk/modules.txt	2011-05-24 13:33:50 UTC (rev 2962)
+++ box/trunk/modules.txt	2011-05-24 15:08:59 UTC (rev 2963)
@@ -29,7 +29,7 @@
 lib/backupclient	lib/backupstore
 
 bin/bbackupobjdump	lib/backupclient lib/backupstore
-bin/bbstored		lib/raidfile	lib/server	lib/backupstore	lib/backupclient
+bin/bbstored		lib/raidfile	lib/server	lib/backupstore
 bin/bbstoreaccounts	lib/raidfile	lib/backupstore
 bin/bbackupd		lib/server	lib/backupclient	qdbm
 bin/bbackupquery	lib/server	lib/backupclient




More information about the Boxbackup-commit mailing list