f89a906b46c4848dee65ca534ad0962531dbb2e6
galt
  Thu Apr 25 00:12:09 2024 -0700
Fix for hgcentralTidy cleaner, handle unsigned int ID over 2 billion correctly. This will work until we someday exceed the 4 billion sessions which make the maximum the field can take. I did not want to work, but the failures were too important to ignore. If the tables are not cleaned regularly, they grow very fast indeed. I have manually run the purge to catch up. We should be good now.

diff --git src/hg/hgcentralTidy/hgcentralTidy.c src/hg/hgcentralTidy/hgcentralTidy.c
index eda46e9..fcd1d51 100644
--- src/hg/hgcentralTidy/hgcentralTidy.c
+++ src/hg/hgcentralTidy/hgcentralTidy.c
@@ -1,575 +1,576 @@
 /* Copyright (C) 2013 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 
 /* hgcentralTidy - Clean out old cart records from userDb and sessionDb tables 
  * in an hgcentral db by processing it in chunks so that it 
  * won't lock out other cgi processes that are running.
  * Also, check that maximum table size has not been exceeded,
  * and send warning to cluster-admin if it has.
  */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hgConfig.h"
 
 char *database = NULL;
 char *host     = NULL;
 char *user     = NULL;
 char *password = NULL;
 
 struct sqlConnection *conn = NULL;
 
 
 int chunkSize = 1000;
 int chunkWait = 0;
 int squealSize = 20;  /* complain if table data_length is bigger than squealSize GB */
  // was 14 until 2018-06-02  
 
 int purgeStart = -1;  /* manual specify purge range in days ago */
 int purgeEnd = -1;
 char *purgeTable = NULL;  /* optionally specify one table to purge */
 
 char *sessionDbTableName = "sessionDb";
 char *userDbTableName = "userDb";
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgcentralTidy - Clean out old carts in hgcentral without blocking cart use\n"
   "usage:\n"
   "   hgcentralTidy config\n"
   "options:\n"
   "   -chunkSize=N - number of rows to examine in one chunk, default %d\n"
   "   -chunkWait=N - sleep interval between chunks to allow other processing, default %d'\n"
   "   -squealSize=N - email warning to cluster-admin when this size in GB is exceeded, default %d'\n"
   "   -purgeStart=N - purge range starts N days ago'\n"
   "   -purgeEnd=N - purge range end N days ago'\n"
   "   -purgeTable=tableName - optional purge table must be userDb or sessionDb. If not specified, both tables are purged.'\n"
   "   -dryRun - option that causes it to skip the call to cleanTableSection.'\n"
   , chunkSize
   , chunkWait
   , squealSize
   );
 }
 
 
 static struct optionSpec options[] = {
    {"chunkSize", OPTION_INT},
    {"chunkWait", OPTION_INT},
    {"squealSize", OPTION_INT},
    {"purgeStart", OPTION_INT},
    {"purgeEnd", OPTION_INT},
    {"purgeTable", OPTION_STRING},
    {"dryRun", OPTION_STRING},
    {NULL, 0},
 };
 
 char *getCfgOption(char *config, char *setting)
 /* get setting for specified config */
 {
 char temp[256];
 safef(temp, sizeof(temp), "%s.%s", config, setting);
 char *value = cfgOption(temp);
 if (!value)
     errAbort("setting %s not found!",temp);
 return value;
 }
 
 
 boolean checkMaxTableSizeExceeded(char *table)
 /* check if max table size has been exceeded, send email warning if so */
 {
 boolean squealed = FALSE;
 long long dataLength = 0;
 long long dataFree = 0;
 struct sqlResult *sr;
 char **row;
 char query[256];
 sqlSafef(query, sizeof(query), "show table status like '%s'", table );
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
 if (!row)
     errAbort("error fetching table status");
 int dlField = sqlFieldColumn(sr, "Data_length");
 if (dlField == -1)
     errAbort("error finding field 'Data_length' in show table status resultset");
 dataLength = sqlLongLong(row[dlField]);
 int dfField = sqlFieldColumn(sr, "Data_free");
 if (dfField == -1)
     errAbort("error finding field 'Data_free' in show table status resultset");
 dataFree = sqlLongLong(row[dfField]);
 verbose(1, "%s: Data_length=%lld Data_free=%lld\n\n", table, dataLength, dataFree);
 if ((dataLength / (1024 * 1024 * 1024)) >= squealSize)
     {
     char msg[256];
     char cmdLine[256];
     char *emailList = "cluster-admin@soe.ucsc.edu galt@soe.ucsc.edu clayfischer@ucsc.edu";
     safef(msg, sizeof(msg), "BIG HGCENTRAL TABLE %s data_length: %lld data_free: %lld\n"
 	, table, dataLength, dataFree);
     printf("%s", msg);
     safef(cmdLine, sizeof(cmdLine), 
 	"echo '%s'|mail -s 'WARNING hgcentral cleanup detected data_length max size %d GB exceeded' %s"
 	, msg
 	, squealSize
 	, emailList
 	);
     system(cmdLine);
     squealed = TRUE;
 
     }
 sqlFreeResult(&sr);
 return squealed;
 }
 
 
-int toDaysAgo(char *useString, int id)
+int toDaysAgo(char *useString, unsigned int id)
 /* Convert mysql datetime into days ago */
 {
 struct tm tmUse;
 zeroBytes(&tmUse, sizeof(struct tm));
 if (!strptime(useString, "%Y-%m-%d %H:%M:%S", &tmUse))
-    errAbort("strptime failed for firstUse %s (id=%d)", useString, id);
+    errAbort("strptime failed for firstUse %s (id=%u)", useString, id);
 
 time_t use, now;
 now = time(NULL);
 use = mktime(&tmUse);
 return difftime(now, use) / (60*60*24); 
 }
 
-void cleanTableSection(char *table, int startId, int endId)
+void cleanTableSection(char *table, unsigned int startId, unsigned int endId)
 /* clean a specific table section */
 {
 struct sqlResult *sr;
 char **row;
 char query[256];
 int rc = 0;
 int dc = 0;
 int delCount = 0;
 int count = 0;
-int maxId = startId - 1;
+unsigned int maxId = startId - 1;
+if (startId == 0) maxId = 0;
 int useCount = 0;
 boolean	deleteThis = FALSE;
 int delRobotCount = 0;
 int oldRecCount = 0;
-struct slInt *delList = NULL;
+struct slUnsigned *delList = NULL;
 time_t cleanSectionStart = time(NULL);
 
 struct dyString *dy = dyStringNew(0);
 
 while(TRUE)
     {
-    verbose(2, "maxId: %d   count=%d  delCount=%d   dc=%d\n", maxId, count, delCount, dc);
+    verbose(2, "maxId: %u   count=%d  delCount=%d   dc=%d\n", maxId, count, delCount, dc);
 
     sqlSafef(query,sizeof(query),
 	"select id, firstUse, lastUse, useCount from %s"
-	" where id > %d order by id limit %d"
+	" where id > %u order by id limit %d"
 	, table
 	, maxId
         , chunkSize
 	);
     sr = sqlGetResult(conn, query);
     rc = 0;
     dc = 0;
     dyStringClear(dy);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	++count;
 	++rc;
 
         maxId = sqlUnsigned(row[0]);
         useCount = sqlSigned(row[3]);
 
         int daysAgoFirstUse = toDaysAgo(row[1], maxId); 
         int daysAgoLastUse  = toDaysAgo(row[2], maxId); 
 
-	verbose(3, "id: %d, firstUse: [%s] [%d days ago], lastUse: [%s] [%d days ago], useCount: %d\n"
+	verbose(3, "id: %u, firstUse: [%s] [%d days ago], lastUse: [%s] [%d days ago], useCount: %d\n"
 	    , maxId
 	    , row[1], daysAgoFirstUse
 	    , row[2], daysAgoLastUse
 	    , useCount 
 	    );
 
 	deleteThis = FALSE;
 
 	if (sameString(table, sessionDbTableName))
 	    {
 	    if (daysAgoLastUse >= 14)
 		{
 		deleteThis = TRUE;
 		++oldRecCount;
 		}
             else if ((daysAgoFirstUse >= 2) && useCount <= 1)  /* reasonable new addition */
                 {
                 deleteThis = TRUE;
                 ++delRobotCount;
                 }
 	    }
 
 	if (sameString(table, userDbTableName))
 	    {
             if ((daysAgoFirstUse >= 7) && useCount < 7)
                 {
                 deleteThis = TRUE;
                 ++delRobotCount;
                 }
             else if ((daysAgoFirstUse >= 2) && useCount <= 1)
                 {
                 deleteThis = TRUE;
                 ++delRobotCount;
                 }
             else if (daysAgoLastUse >= 365)  /* reasonable new addition */
 		{
 		deleteThis = TRUE;
 		++oldRecCount;
 		}
 	    }
 
 	if (deleteThis)
 	    {
     	    ++dc;
-	    verbose(3, "TO DELETE id: %d, "
+	    verbose(3, "TO DELETE id: %u, "
     		"firstUse: [%s] [%d days ago], lastUse: [%s] [%d days ago], useCount: %d\n"
 		, maxId
     		, row[1], daysAgoFirstUse
     		, row[2], daysAgoLastUse
     		, useCount 
     		);
-	    slAddHead(&delList, slIntNew(maxId));
+	    slAddHead(&delList, slUnsignedNew(maxId));
 	    }
 
 	}
     sqlFreeResult(&sr);
    
     if (rc < 1)
 	    break;
 
     if (dc > 0)
 	{
-	struct slInt *i;
+	struct slUnsigned *i;
 	for (i=delList;i;i=i->next)
 	    {
 	    dyStringClear(dy);
-	    sqlDyStringPrintf(dy, "delete from %s where id=%d", table, i->val);
+	    sqlDyStringPrintf(dy, "delete from %s where id=%u", table, i->val);
 	    sqlUpdate(conn,dy->string);
 	    }
 	slFreeList(&delList);
 	}
  
     delCount+=dc;
   
     if (maxId >= endId)
 	{
 	break;  // we have done enough
 	}
 
     	
     verbose(3, "sleeping %d seconds\n", chunkWait);fflush(stderr);
     sleep(chunkWait);
     verbose(3, "awake\n");fflush(stderr);
 
     }
 
     verbose(1, "old recs deleted %d, robot recs deleted %d\n", oldRecCount, delRobotCount);fflush(stderr);
 
     time_t cleanEnd = time(NULL);
     int minutes = difftime(cleanEnd, cleanSectionStart) / 60; 
     verbose(1, "%s\n", ctime(&cleanEnd));
     verbose(1, "%d minutes\n\n", minutes);
 }
 
-int binaryIdSearch(int *ids, int numIds, char *table, int daysAgo)
+int binaryIdSearch(unsigned int *ids, int numIds, char *table, int daysAgo)
 /* Find the array index in ids which holds the id that contains
  * the oldest record satisfying the daysAgo criterion. 
  * If not found, return -1 */
 {
 char query[256];
 int a = 0;
 int b = numIds - 1;
 int m = 0;
-//verbose(1, "\nDEBUG:\n");  // DEBUG REMOVE
 while (TRUE)
     {
     if (a > b)
 	return a;   // is this right?
     m = (b + a) / 2;
     //verbose(1,"bin a=%d, b=%d, m=%d\n", a, b, m);
     while (TRUE)
 	{
-	sqlSafef(query, sizeof(query), "select firstUse from %s where id=%d", table, ids[m]);
+	sqlSafef(query, sizeof(query), "select firstUse from %s where id=%u", table, ids[m]);
 	char *firstUse = sqlQuickString(conn,query);
 	if (firstUse)
 	    {
 	    int daysAgoFirstUse = toDaysAgo(firstUse, ids[m]); 
             //verbose(1, "DEBUG: %d %d %s %d\n", m, ids[m], firstUse, daysAgoFirstUse);  // DEBUG REMOVE
 	    if (daysAgoFirstUse > daysAgo)
 		{
 		a = m + 1;
 		}
 	    else 
 		{
 		b = m - 1;
 		}
 	    break;
 	    }
 	else // rare event: record not found, was it deleted?
 	    {
-	    errAbort("hgcentralTidy: unexpected error in binaryIdSearch() id %d not found in table %s", ids[m], table);
+	    errAbort("hgcentralTidy: unexpected error in binaryIdSearch() id %u not found in table %s", ids[m], table);
 	    }
 	}
     }
 }
 
 
 boolean cleanTable(char *table)
 /* clean a specific table */
 {
 
 struct sqlResult *sr;
 char **row;
 char query[256];
-int *ids;
+unsigned int *ids;
 int totalRows = 0;
 boolean squealed = FALSE;
 time_t cleanStart = time(NULL);
 
 verbose(1, "-------------------\n");
 verbose(1, "Cleaning table %s\n", table);
 verbose(1, "%s\n", ctime(&cleanStart));
 
 
 totalRows = sqlTableSize(conn, table);
 verbose(1,"totalRows=%d\n", totalRows);
 
 if (totalRows==0)
     {
     verbose(1,"table %s is empty!", table);
     return FALSE;
     }
 
 AllocArray(ids, totalRows);
 
 // This is a super-fast query because it only needs to read the index which is cached in memory.
 sqlSafef(query,sizeof(query), "select id from %s" , table);
 sr = sqlGetResult(conn, query);
 int i = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     ids[i++] = sqlUnsigned(row[0]);
     if (i >= totalRows)
 	break;
     }
 sqlFreeResult(&sr);
 totalRows = i;  // in case they differed.
 
 int purgeRangeStart = -1;
 int purgeRangeEnd = -1;
 if (optionExists("purgeStart"))   // manual purge range specified
     {
     purgeStart = optionInt("purgeStart", -1);
     purgeEnd = optionInt("purgeEnd", -1);
     if (purgeStart < 1 || purgeStart > 720)
 	errAbort("Invalid purgeStart");
     if (purgeEnd < 0)
 	purgeEnd = 0;
     if (purgeStart < purgeEnd)
 	errAbort("purgeStart should be greater than purgeEnd (in days ago)");
     purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart);
     purgeRangeEnd   = binaryIdSearch(ids, totalRows, table, purgeEnd);
-    verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%d\n", 
+    verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%u\n", 
                                     purgeStart,   purgeEnd, purgeRangeStart, purgeRangeEnd, purgeRangeEnd-purgeRangeStart, ids[purgeRangeStart]);
     if (!optionExists("dryRun"))
 	cleanTableSection(table, ids[purgeRangeStart], ids[purgeRangeEnd]);
     }
 else  // figure out purge-ranges automatically
     {
 
     int firstUseAge = 0;
     if (sameString(table, sessionDbTableName))
 	firstUseAge = 14;
     if (sameString(table, userDbTableName))
 	firstUseAge = 365;
 
     sqlSafef(query,sizeof(query), "select dayofweek(now())");
     int day = sqlQuickNum(conn, query);
 
     // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at,
     //  and typically produce only a few hundred deletions.
     //  they are growing slowly and expire rarely, so we don't need to scan them
     //  frequently and aggressively.  So ONLY scan them once per week by doing 1/7 per day.
     // Also don't need to worry much about the 
     //  borders of the split-over-7-days divisions shifting much because the set is so nearly static.  YAWN.
+
     int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge);
     int oldRangeSize = (firstUseIndex - 0) / 7;
     int oldRangeStart = oldRangeSize * (day-1);
     int oldRangeEnd = oldRangeStart + oldRangeSize;
-    verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%d\n", 
+    verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%u\n", 
         firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]);
     //int oldRangeStart = 0;
     //int oldRangeEnd = firstUseIndex;
-    //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%d\n", 
+    //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%u\n", 
 	//firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]);
 
     // newly old can be expected to have some delete action
     //  these records have newly crossed the threshold into being old enough to have possibly expired.
     int newOldRangeStart = firstUseIndex;
     int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1);
-    verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%d\n", 
+    verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%u\n", 
 	firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]);
    
 
     // this is the main delete action of cleaning out new robots (20k to 50k or more)
     int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2);
     int robo1RangeEnd   = binaryIdSearch(ids, totalRows, table, 1);
-    verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%d\n", 
+    verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%u\n", 
       robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]);
 
     int robo2RangeStart = -1;
     int robo2RangeEnd = -1;
     if (sameString(table, userDbTableName))
 	{  // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions
 	robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7);
 	robo2RangeEnd   = binaryIdSearch(ids, totalRows, table, 6);
-	verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%d\n", 
+	verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%u\n", 
 	  robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]);
 	}
 
     /* cannot clean until we have all the ranges determined since deleting messes up binSearch */
     if (!optionExists("dryRun"))
 	{
 	verbose(1, "old cleaner:\n");
 	cleanTableSection(table, ids[oldRangeStart], ids[oldRangeEnd]);
 	}
 
     if (!optionExists("dryRun"))
 	{
 	verbose(1, "newOld cleaner:\n");
 	cleanTableSection(table, ids[newOldRangeStart], ids[newOldRangeEnd]);
 	}
 
     if (!optionExists("dryRun"))
 	{
 	verbose(1, "robot cleaner1:\n");
 	cleanTableSection(table, ids[robo1RangeStart], ids[robo1RangeEnd]);
 	}
 
     if (sameString(table, userDbTableName))
 	{
 	if (!optionExists("dryRun"))
 	    {
 	    verbose(1, "robot cleaner2:\n");
 	    cleanTableSection(table, ids[robo2RangeStart], ids[robo2RangeEnd]);
 	    }
 	}
 
     }
 
 /*
 int found = binaryIdSearch(ids, totalRows, table, 1);
 if ((found >= 0) && (found < totalRows))
-    verbose(1, "1 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);
+    verbose(1, "1 days ago found = %d, id == ids[found] = %u \n", found, ids[found]);
 
 found = binaryIdSearch(ids, totalRows, table, 2);
 if ((found >= 0) && (found < totalRows))
-    verbose(1, "2 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);
+    verbose(1, "2 days ago found = %d, id == ids[found] = %u \n", found, ids[found]);
 
 found = binaryIdSearch(ids, totalRows, table, 30);
 if ((found >= 0) && (found < totalRows))
-    verbose(1, "30 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);
+    verbose(1, "30 days ago found = %d, id == ids[found] = %u \n", found, ids[found]);
 
 */
 
 
 	    /*
 	    if (daysAgoFirstUse < 14)
 		{
 		hitEnd = TRUE;
                 break;
 		}
 	    */
 
             /*
 	    if (daysAgoFirstUse < 365)
 		{
 		hitEnd = TRUE;
                 break;
 		}
             */
 
 // may need to pass back this data from the cleanTableSection call TODO
 //verbose(1, "%s: #rows count=%d  delCount=%d\n\n", table, count, delCount);
 
 time_t cleanEnd = time(NULL);
 int minutes = difftime(cleanEnd, cleanStart) / 60; 
 verbose(1, "%s\n", ctime(&cleanEnd));
 verbose(1, "%d minutes total\n\n", minutes);
 
 squealed = checkMaxTableSizeExceeded(table);
 
 return squealed;
 
 }
 
 boolean hgcentralTidy(char *config)
 /* hgcentralTidy - Clean out old carts. */
 {
 
 boolean squealed = FALSE;
 
 /* get connection info */
 database = getCfgOption(config, "db"      );
 host     = getCfgOption(config, "host"    );
 user     = getCfgOption(config, "user"    );
 password = getCfgOption(config, "password");
 
 conn = sqlConnectRemote(host, user, password, database);
 
 verbose(1, "Cleaning database %s.%s\n", host, database);
 verbose(1, "chunkWait=%d chunkSize=%d\n", chunkWait, chunkSize);
 
 //sessionDbTableName = "sessionDbGalt";
 
 //userDbTableName = "userDbGalt";
 
 if (!purgeTable || sameString(purgeTable,sessionDbTableName))
     {
     if (cleanTable(sessionDbTableName))
       squealed = TRUE;
     }
 
 if (!purgeTable || sameString(purgeTable,userDbTableName))
     {
     if (cleanTable(userDbTableName))
       squealed = TRUE;
     }
 
 sqlDisconnect(&conn);
 
 return squealed;
 
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 chunkSize = optionInt("chunkSize", chunkSize);
 chunkWait = optionInt("chunkWait", chunkWait);
 squealSize = optionInt("squealSize", squealSize);
 if (optionExists("purgeTable"))
     {
     purgeTable = optionVal("purgeTable", NULL);
     if (!sameString(purgeTable,"sessionDb") && !sameString(purgeTable,"userDb"))
 	errAbort("Invalid value for purgeTable option, must be userDb or sessionDb or leave option off for both.");
     }
 if (argc != 2)
     usage();
 return hgcentralTidy(argv[1]);
 }