Short Notes: March 2010

Some time ago in the blog, there was a note about SQLite and my support and willingness to contribute source code back to the project. As a first step, decided to post small code snippets directly in the blog every other week. Will figure if anything else can be done with them later. This is the first installment...

Every once in a while, one has to extract a piece of information, dependent upon a related field in the record. Consider for instance a web page title. It changes overtime. Not necessarily every single day, but it does change. Assuming we want to go back in time, it is practical to keep note of the new title once it changes. This leaves us with holes in the daily/hourly history. We end up with a slowly changing dimension. The slowly changing dimensions are long recognized. There is standard ways of dealing with them, but for the purpose of this note, let's assume, we do not want to define and maintain a date range on the record itself, or even better, assume for whatever reason, we want to get the longest ever tile. Then we are left with no choice, but to write a SQL query that goes against the history table twice: once fetching the last known title change record prior to the date in question (record with longest title); then in a second step, using this record identifier (and/or date if part of a unique index), we get the title. Assuming [title] table with [url], [date] and title [value] field, something in the lines of

select [t].[value]

from [title] [t] inner join

( select [url], min([date]) [date]

from [title] where [url] =<> and [date] <= <> group by [url] ) [f] on [f].[url] = [t].[url] and [f].[date] = [t].[date]

select max( [t].[value] ) // there may be two or more titles with the same length

from [title] [t] inner join

( select [url], max( length( [value] ) ) [len]

from [title] where [url] =<> group by [url] ) [f]

on [f].[url] = [t].[url] and [f].[len] = length( [t].[value] )

Not too difficult. Not pretty looking, though. Still when dealing with small lists (and the title change for a particular url over a week or month period is a small list), there might be an easier way if we were given min/max aggregation functions that operate on a condition field and return a dependent value from the record pinpointed by the condition. So, I and up writing SQLite min_indexed and max_indexed extension functions. With them, one constructs the following to get the title as of given date

select max_indexed( [date], [value] ) from [title] where [url] = <> and [date] <= <>

select max_indexed( length( [value] ), [value] ) from [title] where [url] = <>

to get the longest title. Simple.

Once more, a thing to consider is the amount of data the aggregation iterates through. The more complex two step query will give better performance in case of a large dataset and suitable indices defined. But if you have no good index on the condition, you may be better off choosing the simple min/max indexed query. It is easier to read and maintain.

Hope you find other interesting uses for the min/max indexed. Just post a note if you do. Happy coding!

Oh, yeah.. The code snippet follows:

extern const sqlite3_api_routines * sqlite3_api;

typedef struct minmaxAggData {

int flag;

int size;

union {

void* buffer; // allocated by sqlite3_malloc

sqlite_int64 i;

double d;

};

} minmaxAggData;

static int compareValue( sqlite3_value * val, minmaxAggData * base ) {

int valType;

assert( val );

assert( base );

valType = sqlite3_value_type( val );

if ( ( base->flag == SQLITE_INTEGER ) || ( base->flag == SQLITE_FLOAT ) ) {

int conversionAttempted = 0;

integerCompare:

switch( valType ) {

case SQLITE_INTEGER:

case SQLITE_FLOAT:

// direct compare possible

if ( ( base->flag == SQLITE_INTEGER ) && ( valType == SQLITE_INTEGER ) ) {

sqlite_int64 i = sqlite3_value_int64( val );

return ( base->i == i ) ? 0 : ( ( base->i > i ) ? -1: 1 );

}

{

double d1 = ( base->flag == SQLITE_INTEGER ) ? ( ( double ) base->i ) : base->d ;

double d2 = sqlite3_value_double( val );

return ( d1 == d2 ) ? 0 : ( ( d1 > d2 ) ? -1: 1 );

}

break;

case SQLITE_NULL:

return -1;

break;

default:

if ( !conversionAttempted ) {

valType = sqlite3_value_numeric_type( val );

conversionAttempted = 1;

goto integerCompare;

}

else {

return -2;

}

break;

}

else if ( ( base->flag == SQLITE_BLOB ) || ( base->flag == SQLITE_TEXT ) ) {

int b1 = base->size, b2 = sqlite3_value_bytes( val ), result = 0;

const void * v2 = ( base->flag == SQLITE_BLOB ) ? sqlite3_value_blob( val ) : sqlite3_value_text( val );

if ( v2 ) result = memcmp( v2 , base->buffer, ( b1 <>

if ( ( result == 0 ) && ( b1 != b2 ) ) { result = ( b1 > b2 ) ? 1: -1; }

return result;

}

else if ( ( base->flag == SQLITE_NULL ) || ( base->flag == 0 ) ) {

return ( valType != SQLITE_NULL ) ? 1 : 0;

}

else {

assert( 0 && "unknown datatype" );

return -2;

}

static void releaseValue( minmaxAggData * dest) {

assert( dest );

// need to free the buffer

if ( ( dest->flag == SQLITE_TEXT ) || ( dest->flag == SQLITE_BLOB ) ) {

if ( dest->buffer ) {

sqlite3_free( dest->buffer ); } }

memset( dest, 0, sizeof ( minmaxAggData ) );

}

static int copyValue( sqlite3_value * val, minmaxAggData * dest ) {

int valType;

assert( val );

assert( dest );

valType = sqlite3_value_type( val );

releaseValue( dest );

switch( valType ) {

case SQLITE_INTEGER:

dest->i = sqlite3_value_int64( val );

break;

case SQLITE_FLOAT:

dest->d = sqlite3_value_double( val );

break;

case SQLITE_BLOB:

case SQLITE_TEXT:

{

int sz = sqlite3_value_bytes( val );

const void * src = ( valType == SQLITE_TEXT ) ? sqlite3_value_text( val ): sqlite3_value_blob( val );

if ( ( sz ) && ( src ) ) {

if ( ( dest->buffer = sqlite3_malloc( sz + 1 ) ) != 0 ) {

memmove( dest->buffer, src, ( size_t ) sz );

*( ( char* ) dest->buffer + sz ) = 0; // make sure the string is zero terminated

dest->size = sz; } }

}

break;

case SQLITE_NULL:

break;

case 0: // initialized or empty

break;

default:

assert( 0 && "unknown or insupported data type" );

return 0;

}

dest->flag = valType;

return 1;

}

// ---

// Implementation of the min_indexed(), max_indexed()

// the call syntax from sql

// min_indexed( , )

static void minmaxStep( sqlite3_context *context, int argc, sqlite3_value **argv, int max ) {

minmaxAggData* pBest;

// validate the parameter count

if ( argc != 2 ) {

sqlite3_result_error( context, "exactly two parameters expected by the indexed min/max functions.", -1);

return; }

if( sqlite3_value_type( argv[0] )==SQLITE_NULL ) return;

pBest = ( minmaxAggData* ) sqlite3_aggregate_context( context, 2 * sizeof( *pBest ) );

if ( !pBest ) return;

if ( pBest[0].flag ) {

switch( compareValue( argv[0], &pBest[0] ) ) {

case 1:

if ( max ) {

copyValue( argv[0], &pBest[0] );

copyValue( argv[1], &pBest[1] );

}

break;

case -1:

if ( !max ) {

copyValue( argv[0], &pBest[0] );

copyValue( argv[1], &pBest[1] );

}

break;

default:

// no action

break;

}

else {

// the best value to compare against

copyValue( argv[0], &pBest[0] );

// the value to return

copyValue( argv[1], &pBest[1] );

}

void minStep( sqlite3_context *context, int argc, sqlite3_value **argv ) {

minmaxStep( context, argc, argv, 0 ); }

void maxStep( sqlite3_context *context, int argc, sqlite3_value **argv ) {

minmaxStep( context, argc, argv, 1 ); }

void minmaxFinalize( sqlite3_context *context ) {

minmaxAggData* pBest;

pBest = ( minmaxAggData* ) sqlite3_aggregate_context( context, 2 * sizeof( *pBest ) );

if ( !pBest ) {

sqlite3_result_null( context ); return ;}

releaseValue( &pBest[0] );

switch( pBest[1].flag ) {

case SQLITE_INTEGER: sqlite3_result_int64( context, pBest[1].i ); break;

case SQLITE_FLOAT: sqlite3_result_double( context, pBest[1].d ); break;

case SQLITE_BLOB: sqlite3_result_blob( context, pBest[1].buffer, pBest[1].size, sqlite3_free ); break;

case SQLITE_TEXT: sqlite3_result_text( context, ( char * ) pBest[1].buffer, pBest[1].size, sqlite3_free ); break;

default: sqlite3_result_null( context ) ;break;

}

// no need to release the return value. sqlite will take care of that

}

Short Notes

Tuesday, March 23, 2010

Up to the promise given long time ago..

Monday, March 8, 2010

Beautiful and entrancing new music game for iPhone

Wednesday, March 3, 2010

The Startup Expenses and the link to Taxes

Facebook Badge

Followers

Blog Archive

About Me