/*
 *  mbMath.cpp
 *  fp
 *
 *  Created by Robert Delaney on 4/24/09.
 *  Copyright 2009 __Bob Delaney's Science Software__. All rights reserved.
 *
 */


#include "fp.h"
#include "mbConv.h"
#include "newRandom.h"

using namespace std;

// counts the number of binary 1's in x
INT32 countOnes(UINT32 x)
{
	UINT32		mask;
	INT32		i, count;
	
	mask = 1;
	count = 0;
	
	for(i=0;i<blockBits;i++)
	{
		if((x & mask) == mask)
			count++;
		mask = (mask<<1);
	}
	
	return count;
	
}/* countOnes */


// counts identical bits in same positions where positions go from 0 to maxPos inclusive
INT32 compareBitsUINT32(UINT32 x, UINT32 y, INT32 maxPos)
{
	UINT32		mask;
	INT32		i, count;
	
	if(maxPos<0)
		maxPos = 0;
	 
	if(maxPos>31)
		maxPos = 31;
	
	mask = 1;
	count = 0;
	
	for(i=0;i<=maxPos;i++)
	{
		if((x & mask)==(y & mask))
		   count++;
		
		mask = (mask<<1);
	}
	
	return count;
	
}/* compareBitsUINT32 */


// returns the value of the bit at bitIndex
INT32 getBit(UINT32 x, INT32 bitIndex)
{
	UINT32		mask;
	
	if(bitIndex<0 || bitIndex>31)
		return 0;
	
	mask = 1;
	mask = (mask<<bitIndex);
	
	if((x & mask)==mask)
		return 1;
	
	return 0;
	
}/* getBit */


// clears the bit at bitIndex
UINT32 clearBit(UINT32 x, INT32 bitIndex)
{
	UINT32		mask;
	
	if(bitIndex<0 || bitIndex>31)
		return x;
	
	mask = 1;
	mask = (mask<<bitIndex);
	
	if((x & mask)==0)
	{
		return x;
	}
	else
	{
		x = (x ^ mask);
		return x;
	}
	
}/* clearBit */


// sets the bit at bitIndex
UINT32 setBit(UINT32 x, INT32 bitIndex)
{
	UINT32		mask;
	
	if(bitIndex<0 || bitIndex>31)
		return x;
	
	mask = 1;
	mask = (mask<<bitIndex);
	
	x = (x | mask);
	
	return x;
	
}/* setBit */


// returns the value of the bit at bitIndex
INT32 mbGetBit(const mb& x, INT32 bitIndex)
{
	INT32		blockIndex;
	
	if(!x.n)
		return 0;
	
	if(bitIndex<0 || bitIndex>=NumBits(x))
		return 0;
	
	blockIndex = bitIndex/blockBits;
	bitIndex = bitIndex - blockBits*blockIndex;
	
	return getBit(x.b[blockIndex], bitIndex);
	
}/* mbGetBit */


// counts identical bits in same positions from lowBitIndex to highBitIndex inclusive
// zero extension is used if necessary
INT32 mbCompareBits(const mb& x, const mb& y, INT32 lowBitIndex, INT32 highBitIndex)
{
	INT32		i, count;
	
	count = 0;
	
	if(lowBitIndex<0)
		return 0;
	
	for(i=lowBitIndex;i<=highBitIndex;i++)
		if(mbGetBit(x, i)==mbGetBit(y, i))
			count++;
	
	return count;
	
}/* mbCompareBits */


// if 0 <= bitIndex it sets the bit at bitIndex to 1 and returns new value
mb mbSetBit(const mb& x, INT32 bitIndex)
{
	mb		z, mask;
	
	if(bitIndex<0)
	{
		equate(z, x);
		return z;
	}
		
	if(x.n==0)
	{
		z = 1;
		return z;
	}
	
	mask = 1;
	
	mbShiftLeft(mask, mask, bitIndex);
	
	z = (x | mask);
	
	return z;
	
}/* mbSetBit */


// if 0 <= bitIndex < NumBits(x) it clears the bit at bitIndex to 0 and returns new value
mb mbClearBit(const mb& x, INT32 bitIndex)
{
	mb		z, mask;
	
	if(bitIndex<0 || bitIndex>=NumBits(x))
	{
		equate(z, x);
		return z;
	}
	
	if(x.n==0)
	{
		z = 0;
		return z;
	}
	
	mask = 1;
	
	mbShiftLeft(mask, mask, bitIndex);
	
	if((x & mask).n==0)
	{
		equate(z, x);
	}
	else
	{
		z = (x ^ mask);
	}
	
	return z;

}/* mbClearBit */


// exclusive or with 1, which flips but
mb mbFlipBit(const mb& x, INT32 bitIndex)
{
	mb		z, mask;
	
	if(bitIndex<0)
	{
		equate(z, x);
		return z;
	}
	
	if(x.n==0)
	{
		z = 1;
		return z;
	}
	
	mask = 1;
	
	mbShiftLeft(mask, mask, bitIndex);
	
	z = (x ^ mask);
	
	return z;
	
}/* mbFlipBit */


INT32 mbCountOnes(const mb& x)
{
	INT32		i, count;
	
	count = 0;
	
	if(!x.n)
		return count;
	
	for(i=0;i<abs(x.n);i++)
		count = count + countOnes(x.b[i]);
	
	return count;
	
}/* mbCountOnes */


bool isItEven(const mb& x)
{
	mb		z;
	
	if(!x.n)
		return true;
	
	z = (INT32)(x.b[0] & 1);
	
	if(z.n)  // or get rid of z and use if((x.b[0] & 1) == 0) then return true else return false
		return false;
	
	return true;
	
}/* isItEven */


// tests if x is a perfect square, returns square root if so
bool isItSquare(mb& z, const mb& x)
{
	INT32		a, b, c, d;
	
	a = 7;
	b = 31;
	c = 127;
	d = 191;
	
	if(x.n<=0)
	{
		z = 0;
		return true;
	}
	
	if(((x & a) == 1) || ((x & b) == 4) || ((x & c) == 16) || ((x & d) == 0))
	{
		sqrt(z, x);
		if(x==z*z)
			return true;
	}
	
	z = 0;
	return false;
	
}/* isItSquare */

void abs(mb& z, const mb& x)
{
    mb  xt;
    
    xt = x;
    if(xt.n<0)
        xt.n = -xt.n;
    
    z = xt;
    
}/* abs */


mb abs(const mb& x)
{
	mb		z;
	
    abs(z, x);
    
    return z;
	
}/* abs */


// This should only be used on a local static variable in a function, never on a returned mb&
void updateWithCare(mb& x)
{
	if(!x.n)
	{
		if(x.b)
		{
			free(x.b);
			x.n = x.nn = 0;
			x.b = NULL;
		}
		return;
	}
	
    x.nn = abs(x.n);
    free(x.b);
    x.b = NULL;
    //x.nn = x.n;
    x.b = (UINT32*)malloc(x.nn*sizeof(UINT32));
    if(!x.b)
        exit(1);
	
}/* updateWithCare */


void update(mb& x)
{
	if(!x.n)
	{
		if(x.nn && x.b)
		{
			free(x.b);
			x.nn = 0;
			x.b = NULL;
		}
		return;
	}
	
	if(x.nn && x.b)
		free(x.b);
	x.nn = abs(x.n);
	x.b = (UINT32*)malloc(x.nn*sizeof(UINT32));
	if(!x.b)
		exit(1);

}/* update */


// this eliminates leading zeroes
void mbNormalize(mb& z)
{
	bool		isNegative;
	
	if(!z.n)
		return;
	
	isNegative = false;
	if(z.n<0)
	{
		isNegative = true;
		z.n = -z.n;
	}
	
	while(z.n>0 && z.b[z.n-1]==0)
		z.n--;
	
	if(!z.n)
		update(z);
		
	if(isNegative)
		z.n = -z.n;
	
}/* mbNormalize */


// this shifts x left so the most significant bit of the high block of x is 1
// it returns the number of bit shifts
// x is assumed normalized
INT32 alignLeft(mb& x)
{
	static UINT32		mask; // 0x80000000
	static bool			initGood=false;
	UINT32				cx;
	INT32				numBits, inBits, i;
	
	if(!initGood)
	{
		mask = 1;
		mask = (mask<<31);
		initGood = true;
	}
	
	numBits = 0;
	
	for(i=0;i<abs(x.n);i++)
	{
		inBits = 0;
		cx = x.b[abs(x.n)-1-i];
		if(!cx)
			inBits = blockBits;
		else
			while(!(cx & mask))
			{
				inBits++;
				cx = (cx<<1);
			}
		numBits = numBits + inBits;
		if(inBits!=blockBits)
			break;
	}
	
	mbShiftLeft(x, x, numBits);
	
	return numBits;
	
}/* alignLeft */


static INT32 Min(INT32 x, INT32 y)
{
	if(x>y)
		return y;
	
	return x;
	
}/* Min */


static INT32 Max(INT32 x, INT32 y)
{
	if(x>y)
		return x;
	
	return y;
	
}/* Max */


// compares abs(x) to abs(y); returns:
// +1 if abs(x)>abs(y)
// 0  if abs(x)=abs(y)
// -1 if abs(x)<abs(y)
INT32 mbCompareAbs(const mb& x, const mb& y)
{
	static mb			xt, yt;
	INT32				i;
	
	equateWithCare(xt, x);
	equateWithCare(yt, y);
	
	if(!x.n && !y.n)
		return 0;
	
	if(!y.n)
		return 1;
	
	if(!x.n)
		return -1;
	
	mbNormalize(xt);
	mbNormalize(yt);
	
	if(abs(xt.n)>abs(yt.n))
		return 1;
	if(abs(xt.n)<abs(yt.n))
		return -1;
	
	// now abs(xt.n)=abs(yt.n)
	for(i=abs(xt.n)-1;i>=0;i--)
	{
		if(xt.b[i]>yt.b[i])
			return 1;
		if(xt.b[i]<yt.b[i])
			return -1;
	}
	
	return 0;
	
}/* mbCompareAbs */


// z = x*y ; has sign of x
void mulByScalarAbs(mb& z, const mb& x, UINT32 y)
{
	static mb			zt;
	INT32				i, xSign;
	UINT64				cx, cs, carry;
	
	if(!y || !x.n)
	{
		z = 0;
		return;
	}
	
	if(x.n>0)
		xSign = 1;
	else
		xSign = -1;
	
	zt.n = abs(x.n) + 1;
	
	updateWithCare(zt);
	
	carry = 0;
	for(i=0;i<abs(x.n);i++)
	{
		cx = x.b[i];
		cs = y*cx + carry;
		zt.b[i] = cs & 0xFFFFFFFF;
		carry = (cs>>32);
	}
	zt.b[zt.n-1] = carry;
	
	while(zt.n>0 && zt.b[zt.n-1]==0)
		zt.n--;
	
	if(!zt.n)
		update(zt);
	
	equate(z, zt);
	
	z.n = xSign*abs(z.n);
	
}/* mulByScalarAbs */



// to speed up we might just use z and not have s
// need to shift right and left, needs numBits>=0
// s is initialized with needed number of blocks, then x is shifted into s, then z=s
// this multiplies by 2^numBits
void mbShiftLeft(mb& z, const mb& x, INT32 numBits)
{
	static mb			s;
	INT32				restBits; // num bits more than in initial n blocks
	UINT32				bv1,bv2; // hold shift bits
	INT32				i, numWholeBlocks;
	INT32				xSign;
	
	if(!x.n)
	{
		z = 0;
		return;
	}
	
	if(!numBits)
	{
		equate(z, x);
		return;
	}
	
	if(x.n>0)
		xSign = 1;
	else
		xSign = -1;
	
	numBits = abs(numBits);
	s.n = numWholeBlocks = numBits/blockBits;
	restBits = numBits - s.n*blockBits;
		
	s.n+=abs(x.n); // needed number of s blocks
	
	if(!restBits)
	{
		updateWithCare(s);
		// copy x blocks into s blocks moving numWholeBlocks up
		for(i=abs(x.n)-1;i>=0;i--)
			s.b[i+numWholeBlocks] = x.b[i];
		// zero the rest
		for(i=numWholeBlocks-1;i>=0;i--)
			s.b[i] = 0;
		equate(z, s);
		z.n = xSign*abs(z.n);
		return;
	}
	
	// now restBits>0
		
	s.n++; // need another block
	updateWithCare(s);
	// need to zero that highest block!
	s.b[s.n-1] = 0;
	// note that this is identical to above!
	
	// copy x blocks into upper blocks except for the highest
	for(i=abs(x.n)-1;i>=0;i--)
		s.b[i+numWholeBlocks] = x.b[i];
		
	// zero the rest
	for(i=numWholeBlocks-1;i>=0;i--)
		s.b[i] = 0;
		
	// now we must shift left the upper abs(x.n)+1 blocks by restBits
	bv1 = 0;
	for(i=0;i<abs(x.n)+1;i++)
	{
		bv2 = (s.b[i+numWholeBlocks]>>(blockBits-restBits)); // high restBits part shifted to low
		s.b[i+numWholeBlocks] = bv1 + (s.b[i+numWholeBlocks]<<restBits);
		bv1 = bv2;
	}
	
	mbNormalize(s); // the uppermost block might be zero
	equate(z, s);
	
	z.n = xSign*abs(z.n);
		
}/* mbShiftLeft */


// to speed up we might just use z and not have s
// s is initialized with needed number of blocks, then x is shifted into s, then z=s
// this divides by 2^numBits
// x is assumed normalized
// fpNormalize use this mbShiftRight
void mbShiftRight(mb& z, const mb& x, INT32 numBits)
{
	static mb			s;
	INT32				restBits; // num bits more than in initial n blocks
	UINT32				bv1,bv2; // upper block bits
	INT32				i, numWholeBlocks;
	INT32				xSign;
	
	if(!x.n)
	{
		z = 0;
		return;
	}
	
	if(!numBits)
	{
		equate(z, x);
		return;
	}
	
	if(x.n>0)
		xSign = 1;
	else
		xSign = -1;
	
	numBits = abs(numBits);
	
	numWholeBlocks = numBits/blockBits;
	restBits = numBits - numWholeBlocks*blockBits;
	
	s.n = abs(x.n) - numWholeBlocks;
	if(s.n<=0)
	{
		z = 0;
		return;
	}
	
	updateWithCare(s);
	
	// x -> s shifting down by numWholeBlocks
	for(i=0;i<s.n;i++)
		s.b[i] = x.b[i+numWholeBlocks];
		
	if(restBits)
	{
		bv1 = 0; // will shift zeroes into initial high part of s
		for(i=s.n-1;i>=0;i--)
		{
			bv2 = (s.b[i]<<(blockBits-restBits)); // low restBits part of s.b[i] shifted to high
			s.b[i] = bv1 + (s.b[i]>>restBits);
			bv1 = bv2;
		}
	}
	
	while(s.n>0 && s.b[s.n-1]==0)
		s.n--;
	
	if(!s.n)
		update(s);
		
	equate(z, s);
	
	z.n = xSign*abs(z.n);
	
}/* mbShiftRight */


// z.b = x.b + y.b
static void mbAddAbs(mb& z, const mb& x, const mb& y)
{
	static mb			zt;
	INT32				i, minNumBlocks;
	UINT32				carry;
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return;
	}
	
	if(!x.n)
	{
		equate(z, y);
		z.n = abs(z.n);
		return;
	}
	
	if(!y.n)
	{
		equate(z, x);
		z.n = abs(z.n);
		return;
	}
	
	// zt.n is one more than max of abs(x.n) and abs(y.n) in case of non-zero carry
	zt.n = 1 + Max(abs(x.n), abs(y.n));
	minNumBlocks = Min(abs(x.n), abs(y.n));
	
	updateWithCare(zt);
	
	carry = 0;
	for(i=0;i<minNumBlocks;i++)
	{
		zt.b[i] = x.b[i] + y.b[i] + carry;
		if(!carry)
		{
			if(zt.b[i]<x.b[i])
				carry = 1;
			else
				carry = 0;
		}
		else
		{
			if(zt.b[i]<=x.b[i])
				carry = 1;
			else
				carry = 0;
		}
	}
	
	if(abs(x.n)>abs(y.n))
		for(i=minNumBlocks;i<abs(x.n);i++)
		{
			zt.b[i] = x.b[i] + carry;
			if(!carry)
			{
				carry = 0;
			}
			else
			{
				if(zt.b[i]<=x.b[i])
					carry = 1;
				else
					carry = 0;
			}
		}
		
	if(abs(y.n)>abs(x.n))
		for(i=minNumBlocks;i<abs(y.n);i++)
		{
			zt.b[i] = y.b[i] + carry;
			if(!carry)
			{
				carry = 0;
			}
			else
			{
				if(zt.b[i]<=y.b[i])
					carry = 1;
				else
					carry = 0;
			}
		}
		
	if(carry)
		zt.b[zt.n-1] = carry;
	else
		zt.n--;
	
	equate(z, zt);
	
}/* mbAddAbs */


// z.b = x.b - y.b
// returns borrow bit
static INT32 mbSubAbs(mb& z, const mb& x, const mb& y)
{
	static mb			zt;
	static UINT64		mask1; // 0x100000000
	static UINT64		mask2; // 0xFFFFFFFF
	static bool			initGood=false;
	INT32				i, minNumBlocks;
	UINT64				cd, ct, borrow, sLL;
	
	if(!initGood)
	{
		mask2 = 1;
		mask1 = (mask2<<32);
		mask2 = mask1 - 1;
		initGood = true;
	}
	
	zt.n = Max(abs(x.n), abs(y.n));
	minNumBlocks = Min(abs(x.n), abs(y.n));
	
	updateWithCare(zt);
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return 0;
	}
    
    if(x==y)
    {
        z = 0;
        return 0;
    }
	
	if(!x.n)
	{
		equate(z, y);
		z.n = abs(z.n);
		return 0;
	}
	
	if(!y.n)
	{
		equate(z, x);
		z.n = abs(z.n);
		return 1;
	}
	
	borrow = 0;
	
	for(i=0;i<minNumBlocks;i++)
	{
		cd = x.b[i];
		ct = y.b[i];
		sLL = cd - ct - borrow;
		zt.b[i] = sLL & mask2;
		borrow = ((sLL & mask1)>>32);
	}
	
	if(abs(x.n)>abs(y.n))
		for(i=minNumBlocks;i<abs(x.n);i++)
		{
			cd = x.b[i];
			sLL = cd - borrow;
			zt.b[i] = sLL & mask2;
			borrow = ((sLL & mask1)>>32);
		}
		
	if(abs(y.n)>abs(x.n))
		for(i=minNumBlocks;i<abs(y.n);i++)
		{
			ct = y.b[i];
			sLL = -ct - borrow;
			zt.b[i] = sLL & mask2;
			borrow = ((sLL & mask1)>>32);
		}
		
	// get rid of leading zeroes
	while(zt.n>0 && zt.b[zt.n-1]==0)
		zt.n--;
	
	if(!zt.n)
		update(zt);
		
	equate(z, zt);
	
	return (INT32)borrow;
	
}/* mbSubAbs */



// to be used only by div!
// z = b3/b2 using doubles to do the division
static UINT32 ThreeByTwo(mb& b3, mb& b2)
{
	static double	TwoTo32=pow(2., 32.);
	static UINT64	mask; // FFFFFFFF
	static double	zC;
	static bool		initGood=false;
	double			x3, x2, zD;
	UINT32			z;
	
	if(!initGood)
	{
		mask = 1;
		mask = (mask<<32);
		mask = mask - 1;
		zC = 4294967296.;
		initGood = true;
	}
	
	x3 = b3.b[2];
	x3 = TwoTo32*x3 + b3.b[1];
	x3 = TwoTo32*x3 + b3.b[0];
	
	x2 = b2.b[1];
	x2 = TwoTo32*x2 + b2.b[0];
	
	// if x3/x2 = 0x100000000 and equate a UINT32 to it, Cocoa gives 0xFFFFFFFF while minGW gives 0
	
	// without .01, rarely z is one less than it should be, which is a disaster!
	// was .5 which often caused z to be one more than it should be; div handled that but was slowe
	
	zD = x3/x2 + .01;
	if(zD>=zC)
		z = mask;
	else
		z = zD;
	
	return z;
	
}/* ThreeByTwo */


/*
	returns:
	1 if x>y
	0 if x=y
	-1 if x<y
*/
INT32 compare(const mb& x, const mb& y)
{
	INT32	xSign, ySign;
	INT32	result;
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	result = mbCompareAbs(x, y);
	
	if(!result)
	{
		if(xSign==1 && ySign==-1)
			return 1;
	
		if(xSign==-1 && ySign==1)
			return -1;
		
		return 0;
	}
	
	if(result==1)
	{
		if(xSign==1)
			return 1;
		else
			return -1;
	}
	
	// now result = -1
	if(ySign==1)
		return -1;
	else
		return 1;
	
}/* Compare */


// returns the two's complement of abs(x)
// we compute the two's complement of x by subtracting abs(x) from 2^(32*numBlocks)
// used by And, Or, and Xor when x is negative
mb twosComplement(const mb& x, INT32 numBlocks)
{
	mb		z;
	
	if(!x.n)
	{
		z = 0;
		return z;
	}
	
	if(numBlocks<abs(x.n))
		numBlocks = abs(x.n);
	
	z = 1;
	mbShiftLeft(z, z, blockBits*numBlocks);
	
	z = z - abs(x);
	
	return z;
	
}/* twosComplement */


// uses numBlocks = abs(x.n)
mb twosComplement(const mb& x)
{
	mb		z;
	
	z = twosComplement(x, abs(x.n));
	
	return z;
	
}/* twosComplement */


// does bit by bit AND of x with y and returns resultant mb; we ignore signs so resultant>=0
// the number of bits starting with the leading 1 is always the same or less than the equivalent in the smaller of (x, y)
mb mbAnd(const mb& x, const mb& y)
{
	INT32		i, m, numBlocks;
	mb			xx, yy, z;
	
	if(x.n==0 || y.n==0)
	{	
		z = 0;
		return z;
	}
	
	numBlocks = Max(abs(x.n), abs(y.n));
	
	if(x.n<0)
		xx = twosComplement(x, numBlocks);
	else
	{
		xx = x;
	}
	
	if(y.n<0)
		yy = twosComplement(y, numBlocks);
	else
	{
		yy = y;
	}
	
	// find minimum number of blocks
	m = Min(abs(xx.n), abs(yy.n));
	z.n = z.nn = m;
	z.b = (UINT32*)malloc(m*sizeof(UINT32));
	for(i=0;i<m;i++)
	{
		z.b[i] = (xx.b[i] & yy.b[i]);
	}
	
	mbNormalize(z);
	
	return z;
	
}/* mbAnd */

// x & abs(y)
mb mbAnd(const mb& x, INT32 y)
{
	INT32		m;
	UINT32		yy;
	mb			xx, z;
	
	if(x.n==0 || y==0)
	{
		z = 0;
		return z;
	}
	
	if(y<0)
	{
		z = y;
		return mbAnd(x, z);
	}
	
	yy = y;
	
	if(x.n<0)
		xx = twosComplement(x, abs(x.n));
	else
	{
		xx = x;
	}
	
	m = 1;
	z.n = z.nn = m;
	z.b = (UINT32*)malloc(m*sizeof(UINT32));
	
	z.b[0] = (xx.b[0] & yy);
	
	mbNormalize(z);
	
	return z;
	
}/* mbAnd */


mb mbOr(const mb& x, const mb& y)
{
	mb		xx, yy, z;
	INT32	i, mMin, mMax, numBlocks;
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return z;
	}
	
	numBlocks = Max(abs(x.n), abs(y.n));
	
	if(x.n<0)
		xx = twosComplement(x, numBlocks);
	else
	{
		xx = x;
	}
	
	if(y.n<0)
		yy = twosComplement(y, numBlocks);
	else
	{
		yy = y;
	}
	
	if(x.n==0)
	{
		z = yy;
		return z;
	}
	
	if(y.n==0)
	{
		z = xx;
		return z;
	}
	
	mMax = Max(abs(xx.n), abs(yy.n));
	z.n = z.nn = mMax;
	z.b = (UINT32*)malloc(mMax*sizeof(UINT32));
	
	mMin = Min(abs(xx.n), abs(yy.n));
	
	for(i=0;i<mMin;i++)
		z.b[i] = (xx.b[i] | yy.b[i]);
	
	if(abs(xx.n)>abs(yy.n))
	{
		for(i=mMin;i<mMax;i++)
			z.b[i] = xx.b[i];
	}
	else
	{
		for(i=mMin;i<mMax;i++)
			z.b[i] = yy.b[i];
	}

	return z;
	
}/* mbOr */


mb mbOr(const mb& x, INT32 y)
{
	mb		xx, z;
	INT32	i, m;
	UINT32	yy;
	
	if(x.n==0 && y==0)
	{
		z = 0;
		return z;
	}
	
	if(x.n<0)
		xx = twosComplement(x, abs(x.n));
	else
	{
		xx = x;
	}
	
	if(y==0)
	{
		z = xx;
		return z;
	}
	
	if(y<0)
	{
		z = y;
		return mbOr(x, z);
	}
	
	yy = y;
	
	if(x.n==0)
	{
		m = 1;
		z.n = z.nn = m;
		z.b = (UINT32*)malloc(m*sizeof(UINT32));
		z.b[0] = yy;
		return z;
	}
	
	m = abs(xx.n);
	z.n = z.nn = m;
	z.b = (UINT32*)malloc(m*sizeof(UINT32));
	
	z.b[0] = (xx.b[0] | yy);
	for(i=1;i<m;i++)
		z.b[i] = xx.b[i];
	
	return z;
	
}/* mbOr */



mb mbXor(const mb& x, const mb& y)
{
	mb		xx, yy, z;
	INT32	i, mMin, mMax, numBlocks;
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return z;
	}
	
	numBlocks = Max(abs(x.n), abs(y.n));
	
	if(x.n<0)
		xx = twosComplement(x, numBlocks);
	else
	{
		xx = x;
	}
	
	if(y.n<0)
		yy = twosComplement(y, numBlocks);
	else
	{
		yy = y;
	}
	
	if(x.n==0)
	{
		z = yy;
		return z;
	}
	
	if(y.n==0)
	{
		z = xx;
		return z;
	}
	
	mMax = Max(abs(xx.n), abs(yy.n));
	z.n = z.nn = mMax;
	z.b = (UINT32*)malloc(mMax*sizeof(UINT32));
	
	mMin = Min(abs(xx.n), abs(yy.n));
	
	for(i=0;i<mMin;i++)
		z.b[i] = (xx.b[i] ^ yy.b[i]);
	
	if(abs(xx.n)>abs(yy.n))
	{
		for(i=mMin;i<mMax;i++)
			z.b[i] = xx.b[i];
	}
	else
	{
		for(i=mMin;i<mMax;i++)
			z.b[i] = yy.b[i];
	}
	
	mbNormalize(z);
	
	return z;
	
}/* mbXor */


mb mbXor(const mb& x, INT32 y)
{
	mb		xx, z;
	INT32	i, m;
	UINT32	yy;
	
	if(x.n==0 && y==0)
	{
		z = 0;
		return z;
	}
	
	if(x.n<0)
		xx = twosComplement(x, abs(x.n));
	else
	{
		xx = x;
	}
	
	if(y==0)
	{
		z = xx;
		return z;
	}
	
	if(y<0)
	{
		z = y;
		return mbXor(x, z);
	}
	
	yy = y;
	
	if(x.n==0)
	{
		m = 1;
		z.n = z.nn = m;
		z.b = (UINT32*)malloc(m*sizeof(UINT32));
		z.b[0] = yy;
		return z;
	}
	
	m = abs(xx.n);
	z.n = z.nn = m;
	z.b = (UINT32*)malloc(m*sizeof(UINT32));
	
	z.b[0] = (xx.b[0] ^ yy);
	for(i=1;i<m;i++)
		z.b[i] = xx.b[i];
	
	mbNormalize(z);
	
	return z;
	
}/* mbXor */


// z = x + y
void add(mb& z, const mb& x, const mb& y)
{
	static mb			zt;
	INT32				xSign, ySign;
	INT32				result;
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return;
	}
	
	if(!x.n)
	{
		equate(z, y);
		return;
	}
	
	if(!y.n)
	{
		equate(z, x);
		return;
	}
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	
	if(xSign*ySign==1)
	{
		mbAddAbs(zt, x, y);
		zt.n = xSign*abs(zt.n);
	}
	else  // different signs
	{
		// we need to know which is bigger as abs
		result = mbCompareAbs(x, y);
		if(result==0)
		{
			zt = 0;
		}
		if(result==1)
		{
			mbSubAbs(zt, x, y);
			zt.n = xSign*abs(zt.n);
		}
		if(result==-1)
		{
			mbSubAbs(zt, y, x);
			zt.n = ySign*abs(zt.n);
		}
	}
	
	equate(z, zt);
	mbNormalize(z);

}/* add */


void add(mb& z, const mb& x, INT32 y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	add(z, x, yt);
	
}/* add */


void add(mb& z, INT32 x, const mb& y)
{
	static mb		xt;
		
	equateWithCare(xt, x);
	
	add(z, xt, y);
	
}/* add */


void add(mb& z, const mb& x, double y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	add(z, x, yt);
	
}/* add */


void add(mb& z, double x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	add(z, xt, y);
	
}/* add */


void sub(mb& z, const mb& x, const mb& y)
{
	static mb			zt;
	INT32				xSign, ySign;
	INT32				result;
	
	if(x.n==0 && y.n==0)
	{
		z = 0;
		return;
	}
	
	if(!x.n)
	{
		equate(z, y);
		z.n = -z.n;
		return;
	}
	
	if(!y.n)
	{
		equate(z, x);
		return;
	}
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	if(xSign*ySign==-1)
	{
		mbAddAbs(zt, x, y);
		zt.n = xSign*abs(zt.n);
	}
	else  // same signs
	{
		// we need to know which is bigger as abs
		result = mbCompareAbs(x, y);
		if(result==0)
		{
			zt = 0;
		}
		if(result==1)
		{
			mbSubAbs(zt, x, y);
			zt.n = xSign*abs(zt.n);
		}
		if(result==-1)
		{
			mbSubAbs(zt, y, x);
			zt.n = -ySign*abs(zt.n);
		}
	}
	
	equate(z, zt);
	mbNormalize(z);
	
}/* sub */


void sub(mb& z, const mb& x, INT32 y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	sub(z, x, yt);
	
}/* sub */


void sub(mb& z, INT32 x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	sub(z, xt, y);
	
}/* sub */


void sub(mb& z, const mb& x, double y)
{
	static mb		yt;
		
	equateWithCare(yt, y);
	
	sub(z, x, yt);
	
}/* sub */


void sub(mb& z, double x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	sub(z, xt, y);
	
}/* sub */


// z = x*y
void mul(mb& z, const mb& x, const mb& y)
{
	static mb			sx, sy, sz;
	static UINT64		mask; // 0xFFFFFFFF
	static bool			initGood=false;
	INT32				xSign, ySign;
	INT32				i, ix, iy;
	UINT64				cx, cy, sLL, carry;
	
	if(!initGood)
	{
		mask = 1;
		mask = (mask<<32);
		mask = mask - 1;
		initGood = true;
	}
	
	if(x.n==0 || y.n==0)
	{
		z = 0;
		return;
	}
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	equateWithCare(sx, x);
	equateWithCare(sy, y);
	
	sz.n = abs(sx.n) + abs(sy.n);
	
	updateWithCare(sz);
	
	// zero out sz
	for(i=0;i<sz.n;i++)
		sz.b[i] = 0;
		
	if(abs(sx.n)>=abs(sy.n))
	{
		// sy is multiplier
		for(iy=0;iy<abs(sy.n);iy++)
		{
			cy = sy.b[iy];
			carry = 0;
			for(ix=0;ix<abs(sx.n);ix++)
			{
				cx = sx.b[ix];
				sLL = sz.b[iy+ix];
				sLL = sLL + cy*cx + carry;
				sz.b[iy+ix] = sLL & mask;
				carry = (sLL>>32);
			}
			sz.b[iy+abs(sx.n)] = sz.b[iy+abs(sx.n)] + carry;
		}
		
	}
	else
	{
		// sx is multiplier
		for(ix=0;ix<abs(sx.n);ix++)
		{
			cx = sx.b[ix];
			carry = 0;
			for(iy=0;iy<abs(sy.n);iy++)
			{
				cy = sy.b[iy];
				sLL = sz.b[ix+iy];
				sLL = sLL + cx*cy + carry;
				sz.b[ix+iy] = sLL & mask;
				carry = (sLL>>32);
			}
			sz.b[ix+abs(y.n)] = sz.b[ix+abs(y.n)] + carry;
		}
	}
	
	while(sz.n>0 && sz.b[sz.n-1]==0)
		sz.n--;
	
	if(!sz.n)
		update(sz);
		
	equate(z, sz);
	
	z.n = xSign*ySign*abs(z.n);
	
}/* mul */


void mul(mb& z, const mb& x, INT32 y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	mul(z, x, yt);
	
}/* mul */


void mul(mb& z, INT32 x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	mul(z, xt, y);
	
}/* mul */


void mul(mb& z, const mb& x, double y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	mul(z, x, yt);
	
}/* mul */


void mul(mb& z, double x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	mul(z, xt, y);
	
}/* mul */


// z = x/y when abs(y.n)=1; returns remainder
UINT32 mbShortDiv(mb& z, const mb& x, const mb& y)
{
	static mb			s;
	static UINT64		mask; // 0xFFFFFFFF
	static bool			initGood=false;
	INT32				i, xSign, ySign;
	UINT64				cx, cy, cs, carry;
	UINT64				cr;
	UINT32				remainder;
	
	if(!initGood)
	{
		mask = 1;
		mask = (mask<<32);
		mask = mask - 1;
		initGood = true;
	}
	
	if(!y.n || abs(y.n)!=1)
		return 0;
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	s.n = abs(x.n);
	
	updateWithCare(s);
	
	cy = y.b[0]; // what if cy = 1?
	
	carry = 0;
	for(i=s.n-1;i>=0;i--)
	{
		cx = x.b[i];
		cx = cx + carry;
		cs = cx/cy;
		s.b[i] = cs & mask;
		cr = cx - cy*cs;
		carry = (cr<<32);
	}
	
	while(s.n>0 && s.b[s.n-1]==0)
		s.n--;
	
	if(!s.n)
		update(s);
		
	equate(z, s);
	
	z.n = xSign*ySign*abs(z.n);
	
	mbNormalize(z);
	
	remainder = cr;
	
	return remainder;
	
}/* mbShortDiv */


// z = x/y; returns remainder
UINT32 mbShortDiv(mb& z, const mb& x, UINT32 y)
{
	static mb			yt;
	static bool			initGood=false;
	
	if(!initGood)
	{
		yt.n = yt.nn = 1;
		yt.b = (UINT32*)malloc(yt.nn*sizeof(UINT32));
		if(!yt.b)
			exit(1);
		initGood = true;
	}
	
	yt.b[0] = y;
	
	return mbShortDiv(z, x, yt);
	
}/* mbShortDiv */


// z = x/y
// if abs(y.n) = 1 we do short division, else UINT32 division
bool div(mb& z, const mb& x, const mb& y)
{
	static mb			b2, b3;
	static mb			xt, yt, s, t, d; // d is working dividend
	static UINT64		mask1; // 0x100000000
	static UINT64		mask2; // 0xFFFFFFFF
	static bool			initGood=false;
	INT32				xSign, ySign;
	INT32				i, ix, iy, is, index;
	UINT64				cd, ct, ry, borrow, carry, sL, sLL;
	UINT64				cs; // was UINT32 - bad!
	INT32				result;
	bool				done, doSubtract;
	INT32				xNumBits, yNumBits, shiftBits;
	
	if(!initGood)
	{
		b2.n = b2.nn = 2;
		b3.n = b3.nn = 3;
		mask2 = 1;
		mask1 = (mask2<<32);
		mask2 = mask1 - 1;
		b2.b = (UINT32*)malloc(b2.nn*sizeof(UINT32));
		if(!b2.b)
			exit(1);
		b3.b = (UINT32*)malloc(b3.nn*sizeof(UINT32));
		if(!b3.b)
			exit(1);
		initGood = true;
	}
	
	if(!y.n)
		return false;
	
	if(!x.n)
	{
		z = 0;
		return true;
	}
	
	if(abs(y.n)==1)
	{
		mbShortDiv(z, x, y);
		return true;
	}
	
	equateWithCare(xt, x);
	equateWithCare(yt, y);
	
	mbNormalize(xt);
	mbNormalize(yt);
	
	result = mbCompareAbs(xt, yt);
	
	if(result==-1)
	{
		z = 0;
		return true;
	}
	
	if(x.n>=0)
		xSign = 1;
	else
		xSign = -1;
	
	if(y.n>=0)
		ySign = 1;
	else
		ySign = -1;
	
	if(result==0)
	{
		z.n = xSign*ySign;
		update(z);
		z.b[0] = 1;
		return true;
	}
	
	xNumBits = alignLeft(xt);
	yNumBits = alignLeft(yt);
	
	shiftBits = xNumBits - yNumBits;
	
	if(shiftBits<0)
	{
		// we need to shift yt right by abs(shiftBits) to avoid negative shiftBits
		mbShiftRight(yt, yt, abs(shiftBits));
		shiftBits = 0;
	}
	
	// load b2
	b2.b[1] = yt.b[abs(yt.n)-1];
	b2.b[0] = yt.b[abs(yt.n)-2];
	
	// now x>y
	s.n = abs(xt.n) - abs(yt.n) + 1;
	t.n = abs(yt.n) + 1;
	d.n = abs(xt.n) + 1;
	
	updateWithCare(s);
	updateWithCare(t);
	updateWithCare(d);
	
	// zero s.b
	for(i=0;i<s.n;i++)
		s.b[i] = 0;
	
	// d=x
	for(ix=0;ix<abs(xt.n);ix++)
		d.b[ix] = xt.b[ix];
	d.b[d.n-1] = 0;
	
	b3.b[2] = d.b[d.n-1];
	b3.b[1] = d.b[d.n-2];
	b3.b[0] = d.b[d.n-3];
	
	cs = ThreeByTwo(b3, b2);
	
	is = s.n;
	for(ix=0;ix<abs(xt.n)-abs(yt.n)+1;ix++)  // was +1
	{
		done = false;
		if(cs)
			while(!done)
			{
				// calc t = cs*y
				carry = 0;
				for(iy=0;iy<abs(yt.n);iy++)
				{
					ry = yt.b[iy];
					sL = cs*ry + carry;  // optimize later
					t.b[iy] = sL & mask2;
					carry = (sL>>32);
				}
				t.b[t.n-1] = carry;
				
				// check if d>=t
				doSubtract = true;
				for(i=t.n-1;i>=0;i--)
				{
					if(d.b[d.n-t.n+i-s.n+is]>t.b[i])
						break;
					
					if(d.b[d.n-t.n+i-s.n+is]<t.b[i])
					{
						doSubtract = false;
						break;
					}
				}
				
				if(doSubtract)
				{
					// subtract t from high part of d, modifying d
					borrow = 0;
					for(i=0;i<t.n;i++)
					{
						index = d.n-t.n+i-s.n+is;
						cd = d.b[index];
						ct = t.b[i];
						sLL = cd - ct - borrow;
						d.b[index] = sLL & mask2;
						borrow = ((sLL & mask1)>>32);
					}
					done = true;
				}
				else
				{
					cs--;
					if(!cs)
					{
						done = true;
					}
				}
			} // wend
		
		// insert cs into s
		s.b[--is] = cs;
		
		if(ix<abs(x.n)-abs(y.n))
		{
			b3.b[2] = d.b[d.n-1-s.n+is];
			b3.b[1] = d.b[d.n-2-s.n+is];
			b3.b[0] = d.b[d.n-3-s.n+is];
			cs = ThreeByTwo(b3, b2);
		}
	}
	
	// we need to shift s right by shiftBits, which we know is positive or zero
	
	if(shiftBits)
		mbShiftRight(s, s, shiftBits);
	
	while(s.n>0 && s.b[s.n-1]==0)
		s.n--;
	
	if(!s.n)
		update(s);
	
	equate(z, s);
	
	z.n = xSign*ySign*abs(z.n);
	
	return true;
	
}/* div */


bool div(mb& z, const mb& x, INT32 y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	return div(z, x, yt);
	
}/* div */


bool div(mb& z, INT32 x, const mb& y)
{
	static mb		xt;
	
	equateWithCare(xt, x);
	
	return div(z, xt, y);
	
}/* div */


bool div(mb& z, const mb& x, double y)
{
	static mb		yt;
	
	equateWithCare(yt, y);
	
	return div(z, x, yt);
	
}/* div */


bool div(mb& z, double x, const mb& y)
{
	static mb		xt;
	
	equate(xt, x);
	
	return div(z, xt, y);
	
}/* div */


// z = x/y with remainder; assume x and y are normalized
// if abs(y.n) = 1 we do short division, else INT32 division
bool divRem(mb& z, mb& rem, const mb& x, const mb& y)
{
	mb			xt, yt, zt;
	
	if(!y.n)
		return false;
	
	if(!x.n)
	{
		z.n = 0;
		rem.n = 0;
		return true;
	}
	
	equate(xt, x);
	equate(yt, y);
	
	if(abs(y.n)==1)
	{
		rem.n = 1;
		update(rem);
		rem.b[0] = mbShortDiv(z, xt, yt);
		if(!rem.b[0])
			rem.n = 0;
		if(xt.n<0)
			rem.n = -abs(rem.n);
		return true;
	}
	
	if(!div(z, xt, yt))
	{
		z.n = 0;
		rem.n = 0;
		return false;
	}
	
	equate(zt, z);
	
	mul(rem, zt, yt);
	sub(rem, xt, rem);
	
	mbNormalize(rem);
	
	return true;
	
}/* divRem */


bool divRem(mb& z, mb& rem, const mb& x, INT32 y)
{
	mb			yt;
	
	equate(yt, y);
	
	return divRem(z, rem, x, yt);
	
}/* divRem */


bool divRem(mb& z, mb& rem, INT32 x, const mb& y)
{
	mb			xt;
	
	equate(xt, x);
	
	return divRem(z, rem, xt, y);
	
}/* divRem */


bool divRem(mb& z, mb& rem, const mb& x, double y)
{
	mb			yt;
		
	equate(yt, y);
	
	return divRem(z, rem, x, yt);
	
}/* divRem */


bool divRem(mb& z, mb& rem, double x, const mb& y)
{
	mb			xt;
	
	equate(xt, x);
	
	return divRem(z, rem, xt, y);
	
}/* divRem */


// z = x^2
void sqr(mb& z, const mb& x)
{
	mul(z, x, x);
	
}/* sqr */


mb sqr(const mb& x)
{
	mb		z;
	
	sqr(z, x);
	
	return z;
	
}/* sqr */


// uses Newton's Method  z = (z + x/z)/2 to give z = sqrt(x)
void sqrt(mb& z, const mb& x)
{
	static mb		one;
	static mb		s, s1, t1, ss;
	static bool		initGood=false;
	INT32			i, bitCount;
	
	if(!initGood)
	{
		init(one, 1);
		initGood = true;
	}
	
	if(x.n<0)
		return;
	
	if(!x.n)
	{
		z = 0;
		return;
	}
	
	// initial guess
	equateWithCare(s1, x);
	
	// now count the bits in the blocks
	bitCount = NumBits(s1.b[s1.n-1]);
	if(s1.n>1)
		bitCount = bitCount + blockBits*(s1.n-1);
	
	bitCount = bitCount/2;
	
	mbShiftRight(s1, s1, bitCount);
	
	equateWithCare(t1, s1);
	
	for(i=0;i<10000;i++)
	{
		div(s, x, s1);
		add(s, s, s1);
		mbShiftRight(s, s, 1); // s=s/2
		if(!compare(s, s1))
			break;
		if(!compare(s, t1))
		{
			mul(ss, s, s);
			if(compare(ss, x)==1)
				sub(s, s, one);
			break;
		}
		equateWithCare(t1, s1);
		equateWithCare(s1, s);
	}
	
	equate(z, s);
	
}/* sqrt */


mb sqrt(const mb& x)
{
	mb		z;
	
	sqrt(z, x);
	
	return z;
	
}/* sqrt */


// z = x^y
bool power(mb& z, const mb& x, INT32 y)
{
	INT32				i, n, sy;
	static UINT32		mask; // 0x80000000
	static mb			one;
	mb					zt;
	static bool			initGood=false;
	
	if(!initGood)
	{
		init(one, 1);
		mask = 1;
		mask = (mask<<31); // 0x80000000
		initGood = true;
	}
	
	if(!compare(x, one))
	{
		equate(z, one);
		return true;
	}
	
	if(y<0 && x.n)
	{
		z = 0;
		return true;
	}
	
	if(y<0 && !x.n)
	{
		z = 0;
		return false;
	}
	
	if(y==0)
	{
		equate(z, one);
		if(!x.n)
			return false;
		
		return true;
	}
	
	// now y>0
	equate(zt, one);
	sy = y;
	n = NumBits(sy);
	
	sy = (sy<<(32-n));  // go to leading 1 bit
	
	for(i=0;i<n;i++)
	{
		mul(zt, zt, zt); // zt^2
		if(sy & mask)
			mul(zt, zt, x);
		sy = (sy<<1);
	}
	
	equate(z, zt);
	
	return true;
	
}/* power */

// z = x^y
bool power(mb& z, const mb& x, const mb& y)
{
    mb      zt;
    int     bitIndex;

    if(abs(x)==1)
    {
        z = x;
        return true;
    }
    
    if(!x)
    {
        z = 0;
        return true;
    }
    
    if(y.n<0 && x.n)
    {
        z = 0;
        return true;
    }
    
    if(y.n<0 && !x.n)
    {
        z = 0;
        return false;
    }
    
    if(!y)
    {
        z = 1;
        if(!x.n)
            return false;
        
        return true;
    }
    
    // now y > 0
    
    zt = 1;
    bitIndex = NumBits(y) - 1; // points to highest 1 bit
    while(bitIndex>=0)
    {
        mul(zt, zt, zt);
        if(mbGetBit(y, bitIndex--))
            mul(zt, zt, x);
    }
    
    z = zt;
    return true;
	
}/* power */

bool powerMod(mb& z, const mb& x, const mb& y, const mb& p)
{
    mb      zt;
    int     bitIndex;
    
    if(abs(x)==1)
    {
        z = x % p;
        return true;
    }
    
    if(!x)
    {
        z = 0;
        return true;
    }
    
    if(y.n<0 && x.n)
    {
        z = 0;
        return true;
    }
    
    if(y.n<0 && !x.n)
    {
        z = 0;
        return false;
    }
    
    if(!y)
    {
        z = 1;
        if(!x.n)
            return false;
        
        return true;
    }
    
    // now y > 0
    
    zt = 1;
    bitIndex = NumBits(y) - 1; // points to highest 1 bit
    while(bitIndex>=0)
    {
        zt = (zt * zt) % p;
        if(mbGetBit(y, bitIndex--))
            zt = (zt * x) % p;
    }
    
    z = zt;
    return true;
    
}/* powerMod */


mb power(const mb& x, INT32 y)
{
	mb		z;
	
	power(z, x, y);
	
	return z;
	
}/* power */


mb power(const mb& x, const mb& y)
{
	mb		z;
	
	power(z, x, y);
	
	return z;
	
}/* power */

mb powerMod(const mb& x, const mb& y, const mb& p)
{
    mb        z;
    
    powerMod(z, x, y, p);
    
    return z;
    
}/* powerMod */

// z1 = x^(y^z)
// z1 = x^(y^z) = x^d mod p where d = y^z mod phi(p)
void powPowMod(mb& z1, const mb& x, const mb& y, const mb& z, const mb& p)
{
    mb      d, phi1, z1t;
    
    phi1 = phi(p);
    d = powerMod(y, z, phi1);
    if(d.n<0)
        d = d + phi1;
    z1t = powerMod(x, d, p);
    
    z1 = z1t;
    
}/* powPowMod */

mb powPowMod(const mb& x, const mb& y, const mb& z, const mb& p)
{
    mb      z1;
    
    powPowMod(z1, x, y, z, p);
    
    return z1;
}/* powPowMod */



// counts the number of bits in x
INT32 NumBits(const mb& x)
{
	INT32		count;
	
	if(x.n==0)
		return 1;
	
	count = NumBits(x.b[abs(x.n)-1]);
	
	if(abs(x.n)>1)
	   count = count + blockBits*(abs(x.n)-1);
	   
	return count;
	
}/* NumBits */


// approximate log to base 2 of x, x can be negative; it is treated as positive
INT32 Lg2(const mb& x)
{
	mb				xt;
	static UINT32	mask;
	static bool		initGood=false;
	INT32			i, xPowerOfTwo, numZeroBits;
	UINT32			cs;
	
	if(!initGood)
	{
		mask = 1;
		mask = (mask<<31); // 0x80000000 (which we can't use directly since it's a negative INT32!
		initGood = true;
	}
	
	equate(xt, x);
	xt.n = abs(xt.n);
	
	xPowerOfTwo = blockBits*(xt.n-1);
	
	// look at high blocks
	numZeroBits = 0;
	cs = xt.b[xt.n-1];
	for(i=0;i<blockBits;i++)
	{
		if(!(cs & mask))
		{
			numZeroBits++;
			cs = (cs<<1);
		}
		else
			break;
	}
	
	return xPowerOfTwo + blockBits - numZeroBits - 1;
	
}/* Lg2 */


void log(double& z, const mb& x)
{
	fp				xt, zt;
	
	if(x.n<=0)
		return;
	
	equate(xt, x);
	log(zt, xt);
	equate(z, zt);
	
}/* log */


double log(const mb& x)
{
	double		z;
	
	log(z, x);
	
	return z;
	
}/* log */

// greater common divisor
void gcd(mb& z, const mb& x, const mb& y)
{
	mb				xt, yt, zt, t, zTemp; // xt = u  yt = v
	
	equate(xt, x);
	equate(yt, y);
	
	if(xt.n<0)
		xt.n = -xt.n;
	
	if(yt.n<0)
		yt.n = -yt.n;
	
	if(!yt.n)
		equate(z, xt);
	else
	{
		do
		{
			divRem(zTemp, t, xt, yt); // t = xt % yt
			equate(xt, yt);
			equate(yt, t);
		}
		while(yt.n);
			
		equate(zt, xt);
	}
    
    z = zt;
		
}/* gcd */


mb gcd(const mb& x, const mb& y)
{
	mb		z;
	
	gcd(z, x, y);
	
	return z;
	
}/* gcd */


// LCM of two integers x and y is the smallest positive integer that is divisible by both x and y
void lcm(mb& z, const mb& x, const mb& y)
{
    mb      zt;
    
    zt = abs(x*y) / gcd(x, y);
    
    z = zt;
    
}/* lcm */

mb lcm(const mb& x, const mb& y)
{
    mb  z;
    
    lcm(z, x, y);
    
    return z;
    
}/* lcm */

// uses iterative method described in http://en.wikipedia.org/wiki/Extended_Euclidean_algorithm
// solves a*x + b*y = gcd(a, b) for x, y, gcd
void extendedGCD(mb& x, mb& y, mb& gcd, const mb& a, const mb& b)
{
	mb		at, bt, xLast, yLast, q, temp;
	
	at = a;
	bt = b;
	x = 0;
	xLast = 1;
	y = 1;
	yLast = 0;
	
	while(bt.n!=0)
	{
		q = at/bt;
		temp = bt;
		myModulus(bt, at, bt);
		at = temp;
		temp = x;
		x = xLast - q*x;
		xLast = temp;
		temp = y;
		y = yLast - q*y;
		yLast = temp;
	}
	
	x = xLast;
	y = yLast;
	gcd = at;
	
}/* extendedGCD */


void nthRoot(mb& z, const mb& x, INT32 n)
{
	static mb		one, two;
	mb				xt, zt, s, s1;
	static bool		initGood=false;
	bool			isNegative;
	INT32			i, result;
	
	if(!initGood)
	{
		init(one, 1);
		init(two, 2);
		initGood = true;
	}
	
	if(n<=0)
	{
		z.n = 0;
		return;
	}
	
	if(n==1)
	{
		equate(z, x);
		return;
	}
	
	equate(xt, x);
	
	if(xt.n<0)
	{
		xt.n = -xt.n;
		isNegative = true;
	}
	else
		isNegative = false;
	   
	if(2*(n/2)==n && isNegative)
	{
		z = 0; // error in input should be caught by caller
		return;
	}
	
	if(!compare(xt, one))
	{
		equate(z, x);
		return;
	}
	
	power(s, two, n);
	if(compare(xt, s)==-1)
	{
		equate(z, one);
		if(isNegative)
			z.n = -z.n;
		return;
	}
	
	// initial guess
	power(zt, two, Lg2(xt)/n);
	// s1=n*zt^(n-1); s=zt^n
	for(i=0; ;i++)
	{
		power(s1, zt, n-1);
		mul(s, s1, zt);
		mul(s1, s1, n);
		sub(s, xt, s);
		div(s, s, s1);
		add(s, s, zt);
		if(!compare(s, zt))
			break;
		equate(zt, s);
	}
	
	power(s, zt, n);
	result = compare(s, xt);
	if(result==1)
		while(result==1)
		{
			sub(zt, zt, one);
			power(s, zt, n);
			result = compare(s, xt);
		}
	else
		if(result==-1)
		{
			while(result==-1)
			{
				add(zt, zt, one);
				power(s, zt, n);
				result = compare(s, xt);
			}
			if(result==1)
				sub(zt, zt, one);
		}
	
	equate(z, zt);
	if(isNegative)
		z.n = -z.n;
	
}/* nthRoot */


mb nthRoot(const mb& x, INT32 n)
{
	mb		z;
	
	nthRoot(z, x, n);
	
	return z;
	
}/* nthRoot */


void factorial(mb& z, const mb& x)
{
	static mb		one;
	mb				xt;
	static bool		initGood=false;
	
	if(!initGood)
	{
		init(one, 1);
		initGood = true;
	}
	
	if(x.n<0)
		return;
	
	if(!x.n)
	{
		equate(z, one);
		return;
	}
	
	equate(xt, x);
	
	equate(z, one);
	while(xt.n)
	{
		mul(z, z, xt);
		sub(xt, xt, one);
	}
	
}/* factorial */


mb factorial(const mb& x)
{
	mb		z;
	
	factorial(z, x);
	
	return z;
	
}/* factorial */


/*
  binomial calculates the binomial coefficient C(n, r) = n!/(r!(n-r)!)
 It uses C(n, r) = n(n-1)...(n-r+1)/r! with r terms in numerator and denominator and calculates
 [{[(n-r+1)(n-r+2)]/2}(n-r+3)]/3 and so on for r terms in all, so each result is an integer
 
 It also uses C(n, r) = C(n, n-r) to use the smaller number of iterations
*/
bool binomial(mb& z, const mb& n, const mb& r)
{
	static mb		one;
	mb				i, rt, rt1, s, t;
	static bool		initGood=false;
	
	if(!initGood)
	{
		init(one, 1);
		initGood = true;
	}
	
	if(n.n<0 || r.n<0)
	{
		z = 0;
		return false;
	}
	
	if(compare(n, r)==-1)
	{
		z = 0;
		return false;
	}
	
	if(!compare(n, r) || !r.n)
	{
		equate(z, one);
		return true;
	}
	
	equate(rt, r);
	sub(rt1, n, r);
	if(compare(rt1, rt)==-1)
		equate(rt, rt1);
	
	equate(i, one);
	sub(s, n, rt);
	equate(t, one);
	
	// rt is now the smaller of r,n-r
	while(compare(rt, i)>-1)
	{
		add(s, s, one);
		mul(t, t, s);
		div(t, t, i);
		add(i, i, one);
	}
	
	equate(z, t);
	
	return true;
	
}/* binomial */


mb binomial(const mb& n, const mb& r)
{
	mb		z;
	
	if(binomial(z, n, r))
		return z;
	
	z = 0;
	return z;
	
}/* binomial */


bool binomial(mb& z, const mb& n, INT32 r)
{
	mb			rt;
	
	equate(rt, r);
	
	return binomial(z, n, rt);

}/* binomial */


mb binomial(const mb& n, INT32 r)
{
	mb		z;
	
	if(binomial(z, n, r))
		return z;
	
	z = 0;
	return z;
	
}/* binomial */


bool binomial(mb& z, INT32 n, const mb& r)
{
	mb			nt;
	
	equate(nt, n);
	
	return binomial(z, nt, r);
	
}/* binomial */


mb binomial(INT32 n, const mb& r)
{
	mb		z;
	
	if(binomial(z, n, r))
		return z;
	
	z = 0;
	return z;
	
}/* binomial */


bool binomial(mb& z, INT32 n, INT32 r)
{
	mb			nt, rt;
	
	equate(nt, n);
	equate(rt, r);
	
	return binomial(z, nt, rt);
	
}/* binomial */


mb binomial(INT32 n, INT32 r)
{
	mb		z;
	
	if(binomial(z, n, r))
		return z;
	
	z = 0;
	return z;
	
}/* binomial */

// Pollard's rho algorithm
// returns factor as a factor of n
void rho(mb& factor, const mb& n)
{
    mb      x, xf, nt, aSeed;
    int     count, cycleSize;
    
    nt = n;
    InitArray(0);
again:
    factor = nt;
    while(factor==nt)
    {
        mbRandom(aSeed, nt);
        x = 2;
        xf = 2;
        cycleSize = 2;
        factor = 1;
        while(factor==1)
        {
            for(count=1;count<=cycleSize && factor<=1;++count)
            {
                x = (x*x+aSeed) % nt;
                if(x<0.)
                    x = x + nt;
                factor = gcd(x-xf, nt);
            }
            cycleSize*=2;
            //cout << cycleSize << endl;
            if(cycleSize>10000000)
                mbRandom(aSeed, nt);
            xf = x;
        }
    }
    
    if(!isItPrime(factor, 35))
    {
        nt = factor;
        goto again;
    }
    
}/* rho */

mb rho(const mb& n)
{
    mb  factor;
    
    rho(factor, n);
    return factor;
    
}/* rho */

// finds factors of n
void findFactors(mb*& z, int*& power, int& numFactors, const mb& n)
{
    int     i, exp, exp2;
    mb      factor, nt, n1, two;
    mb      zt[1000];
    int     powert[1000];
    
    for(i=0;i<1000;++i)
        init(zt[i]);
    
    if(isItPrime(n,30))
    {
        numFactors = 1;
        z = (mb*)malloc(numFactors*sizeof(mb));
        init(z[0]);
        power = (int*)malloc(numFactors*sizeof(int));
        z[0] = n;
        power[0] = 1;
        return;
    }
    
    nt = n;
    
    // Pollard's rho algorithm has problem with powers of 2
    two = 2;
    exp2 = 0;
    n1 = nt / two;
    while(nt==n1*two)
    {
        exp2++;
        nt = n1;
        n1 = nt / two;
    }
    
    if(nt==1)
    {
        numFactors = 1;
        z = (mb*)malloc(numFactors*sizeof(mb));
        init(z[0]);
        power = (int*)malloc(numFactors*sizeof(int));
        z[0] = 2;
        power[0] = exp2;
        return;
    }
    
    i = 0;
    if(nt!=n)
    {
        zt[i] = 2;
        powert[i++] = exp2;
    }
    else
        zt[i] = 0;
    
    while(!isItPrime(nt, 35) && nt!=1)
    {
        rho(factor, nt);
        zt[i++] = factor;
        nt = nt / factor;
    }
    zt[i++] = nt;   // i now has number of factors
    numFactors = i;
    z = (mb*)malloc(numFactors*sizeof(mb));
    power = (int*)malloc(numFactors*sizeof(int));
    //void qsort(void* base, size_t num, size_t size, int (*comparmb)(const void*,const void*));
    qsort(zt, numFactors, sizeof(mb), comparmb);
    
    for(i=0;i<numFactors;++i)
    {
        init(z[i]);
        z[i] = zt[i];
    }
    
}/* findFactors */

int comparmb(const void* a, const void* b)
{
    if ( *(mb*)a <  *(mb*)b ) return -1;
    if ( *(mb*)a == *(mb*)b ) return 0;
    //if ( *(mb*)a >  *(mb*)b ) return 1;
    return 1;
    
}/* comparmb */

