733ba7b804e84d7b60ccad1f137398ecd52db983 chmalee Tue Apr 23 18:15:09 2024 -0700 Add a general highlight trackDb variable(s), working like trackDb filters, except put a color behind the item, refs #24507 diff --git src/hg/utils/tdbQuery/tdbQuery.c src/hg/utils/tdbQuery/tdbQuery.c index f878cd5..7187b6a 100644 --- src/hg/utils/tdbQuery/tdbQuery.c +++ src/hg/utils/tdbQuery/tdbQuery.c @@ -1,1322 +1,1338 @@ /* tdbQuery - Query the trackDb system using SQL syntax.. */ /* Copyright (C) 2013 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "localmem.h" #include "dystring.h" #include "obscure.h" #include "portable.h" #include "errAbort.h" #include "trackDb.h" #include "tdbRecord.h" #include "ra.h" #include "hdb.h" /* Just for strict option. */ #include "rql.h" static char *clRoot = "~/kent/src/hg/makeDb/trackDb"; /* Root dir of trackDb system. */ static boolean clCheck = FALSE; /* If set perform lots of checks on input. */ static boolean clStrict = FALSE; /* If set only return tracks with actual tables. */ #define RELEASE_ALPHA (1 << 0) #define RELEASE_BETA (1 << 1) #define RELEASE_PUBLIC (1 << 2) static char *release = "alpha"; static unsigned releaseBit = RELEASE_ALPHA; static boolean clNoBlank = FALSE; /* If set suppress blank lines in output. */ static boolean clOneLine = FALSE; /* If set then print record on single pipe-separated line. */ static char *clRewrite = NULL; /* Rewrite to given directory. */ static boolean clNoCompSub = FALSE; /* If set don't do subtrack inheritence of fields. */ static int shortLabelLength; /* if non-zero check that short labels are no longer than this */ static int longLabelLength; /* if non-zero check that long labels are no longer than this */ void usage() /* Explain usage and exit. */ { errAbort( "tdbQuery - Query the trackDb system using SQL syntax.\n" "Usage:\n" " tdbQuery sqlStatement\n" "Where the SQL statement is enclosed in quotations to avoid the shell interpreting it.\n" "Only a very restricted subset of a single SQL statement (select) is supported. Examples:\n" " tdbQuery \"select count(*) from hg18\"\n" "counts all of the tracks in hg18 and prints the results to stdout\n" " tdbQuery \"select count(*) from *\"\n" "counts all tracks in all databases.\n" " tdbQuery \"select track,shortLabel from hg18 where type like 'bigWig%%'\"\n" "prints to stdout a a two field .ra file containing just the track and shortLabels of bigWig \n" "type tracks in the hg18 version of trackDb.\n" " tdbQuery \"select * from hg18 where track='knownGene' or track='ensGene'\"\n" "prints the hg18 knownGene and ensGene track's information to stdout.\n" " tdbQuery \"select *Label from mm9\"\n" "prints all fields that end in 'Label' from the mm9 trackDb.\n" "OPTIONS:\n" " -root=/path/to/trackDb/root/dir\n" "Sets the root directory of the trackDb.ra directory hierarchy to be given path. By default\n" "this is ~/kent/src/hg/makeDb/trackDb.\n" " -check\n" "Check that trackDb is internally consistent. Prints diagnostic output to stderr and aborts if \n" "there's problems.\n" " -strict\n" "Mimic -strict option on hgTrackDb. Suppresses tracks where corresponding table does not exist.\n" " -release=alpha|beta|public\n" "Include trackDb entries with this release tag only. Default is alpha.\n" " -noBlank\n" "Don't print out blank lines separating records\n" " -oneLine\n" "Print single ('|') pipe-separated line per record\n" " -noCompSub\n" "Subtracks don't inherit fields from parents\n" " -shortLabelLength=N\n" "Complain if shortLabels are over N characters\n" " -longLabelLength=N\n" "Complain if longLabels are over N characters\n" ); } static struct optionSpec options[] = { {"root", OPTION_STRING}, {"check", OPTION_BOOLEAN}, {"strict", OPTION_BOOLEAN}, {"release", OPTION_STRING}, {"noBlank", OPTION_BOOLEAN}, {"oneLine", OPTION_BOOLEAN}, {"rewrite", OPTION_STRING}, {"noCompSub", OPTION_BOOLEAN}, {"shortLabelLength", OPTION_INT}, {"longLabelLength", OPTION_INT}, {NULL, 0}, }; #define glKeyField "track" /* The field that has the record ID */ struct hash *glTagTypes = NULL; /* Hash of tagTypes file keyed by tag. */ char glTagTypeFile[PATH_LEN]; /* File name of tagTypes.tab including dir. */ void recordLocationReport(struct tdbRecord *rec, FILE *out) /* Write out where record ends. */ { struct tdbFilePos *pos; for (pos = rec->posList; pos != NULL; pos = pos->next) fprintf(out, "in track %s stanza starting line %d of %s\n", rec->key, pos->startLineIx, pos->fileName); } void recordWarn(struct tdbRecord *rec, char *format, ...) /* Issue a warning message. */ { va_list args; va_start(args, format); vaWarn(format, args); va_end(args); recordLocationReport(rec, stderr); } void recordAbort(struct tdbRecord *rec, char *format, ...) /* Issue a warning message. */ { va_list args; va_start(args, format); vaWarn(format, args); va_end(args); recordLocationReport(rec, stderr); noWarnAbort(); } struct hash *readTagTypeHash(char *fileName) /* Set up tagTypeHash and other stuff needed for checking. */ { struct hash *hash = hashNew(0); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; while (lineFileNextReal(lf, &line)) { struct slName *typeList = NULL; char *tag = nextWord(&line); char *word; while ((word = nextWord(&line)) != NULL) slNameAddHead(&typeList, word); hashAdd(hash, tag, typeList); } lineFileClose(&lf); return hash; } static boolean matchAnyWild(struct slName *wildList, char *s) /* Return TRUE if s matches any wildcard in list. */ { struct slName *wild; for (wild = wildList; wild != NULL; wild = wild->next) { if (wildMatch(wild->name, s)) return TRUE; } return FALSE; } struct dbPath /* A database directory and path. */ { struct dbPath *next; char *db; char *dir; }; static struct dbPath *getDbPathList(char *root) /* Get list of all "database" directories with any trackDb.ra files two under us. */ { struct dbPath *pathList = NULL, *path; struct fileInfo *org, *orgList = listDirX(root, "*", TRUE); /* If in strict mode avoid looking up databases that aren't in mysql. */ struct hash *dbStrictHash = NULL; if (clStrict) { struct sqlConnection *conn = sqlConnect(NULL); struct slName *db, *dbList = sqlGetAllDatabase(conn); dbStrictHash = hashNew(0); for (db = dbList; db != NULL; db = db->next) hashAdd(dbStrictHash, db->name, NULL); sqlDisconnect(&conn); slFreeList(&dbList); } for (org = orgList; org != NULL; org = org->next) { if (org->isDir) { struct fileInfo *db, *dbList = listDirX(org->name, "*", TRUE); for (db = dbList; db != NULL; db = db->next) { if (db->isDir) { char trackDbPath[PATH_LEN]; safef(trackDbPath, sizeof(trackDbPath), "%s/trackDb.ra", db->name); char descriptionHtmlPath[PATH_LEN]; safef(descriptionHtmlPath, sizeof(descriptionHtmlPath), "%s/description.html", db->name); if (fileExists(trackDbPath) || fileExists(descriptionHtmlPath)) { char *s = strrchr(db->name, '/'); assert(s != NULL); char *fileOnly = s+1; if (dbStrictHash == NULL || hashLookup(dbStrictHash, fileOnly) != NULL) { AllocVar(path); path->db = cloneString(fileOnly); path->dir = cloneString(db->name); slAddHead(&pathList, path); } } } } slFreeList(&dbList); } } slFreeList(&orgList); slReverse(&pathList); return pathList; } static struct slName *dbPathToFiles(struct dbPath *p) /* Convert dbPath to a list of files. */ { struct slName *pathList = NULL; char *dbDir = p->dir; char *relPaths = "../../trackDb.ra ../trackDb.ra trackDb.ra"; char *buf = cloneString(relPaths); char *line = buf, *word; while ((word = nextWord(&line)) != NULL) { char relDir[PATH_LEN], relFile[PATH_LEN], relSuffix[PATH_LEN]; splitPath(word, relDir, relFile, relSuffix); char dir[PATH_LEN]; safef(dir, sizeof(dir), "%s/%s", dbDir, relDir); char *path = simplifyPathToDir(dir); char pattern[PATH_LEN]; safef(pattern, sizeof(pattern), "%s%s", relFile, relSuffix); struct fileInfo *fi, *fiList = listDirX(path, pattern, TRUE); for (fi = fiList; fi != NULL; fi = fi->next) slNameAddHead(&pathList, fi->name); freeMem(path); slFreeList(&fiList); } freeMem(buf); slReverse(&pathList); return pathList; } struct dbPath *dbPathFind(struct dbPath *list, char *db) /* Return element on list corresponding to db, or NULL if it doesn't exist. */ { struct dbPath *p; for (p=list; p != NULL; p = p->next) if (sameString(p->db, db)) break; return p; } unsigned buildReleaseBits(struct tdbRecord *record) /* unpack the comma separated list of possible release tags */ { char *rel = tdbRecordFieldVal(record, "release"); if (rel == NULL) return RELEASE_ALPHA | RELEASE_BETA | RELEASE_PUBLIC; char relCpy[strlen(rel) + 1]; safecpy(relCpy, sizeof relCpy, rel); rel = relCpy; unsigned bits = 0; while(rel) { char *end = strchr(rel, ','); if (end) *end++ = 0; rel = trimSpaces(rel); if (sameString(rel, "alpha")) bits |= RELEASE_ALPHA; else if (sameString(rel, "beta")) bits |= RELEASE_BETA; else if (sameString(rel, "public")) bits |= RELEASE_PUBLIC; else errAbort("Tracks must have a release combination of alpha, beta, and public on line %d of %s" "(not '%s')", tdbRecordLineIx(record), tdbRecordFileName(record), rel); rel = end; } return bits; } boolean recordMatchesRelease( struct tdbRecord *record, unsigned currentReleaseBit) /* Return TRUE if record is compatible with release. */ { unsigned bits = buildReleaseBits(record); if (bits & currentReleaseBit) return TRUE; return FALSE; } boolean compatibleReleases(struct tdbRecord *a, struct tdbRecord *b, unsigned currentReleaseBit) /* Return TRUE if either a or b release is null, or if a and b are the same regarding the * current release. */ { return (tdbRecordFieldVal(a, "release") == NULL || tdbRecordFieldVal(b, "release") == NULL || recordMatchesRelease(a, currentReleaseBit) == recordMatchesRelease(b, currentReleaseBit)); } boolean sameKeyCompatibleRelease(struct tdbRecord *a, struct tdbRecord *b, unsigned currentReleaseBit) /* Return TRUE if a and b have the same key and compatible releases. */ { return sameString(a->key, b->key) && compatibleReleases(a, b, currentReleaseBit); } struct tdbRecord *filterOnRelease( struct tdbRecord *list, unsigned currentReleaseBit) /* Return release-filtered version of list. */ { struct tdbRecord *newList = NULL; struct tdbRecord *record, *next; for (record = list; record != NULL; record = next) { next = record->next; if (recordMatchesRelease(record, currentReleaseBit)) { slAddHead(&newList, record); } } slReverse(&newList); return newList; } static void addReleaseTag(struct tdbRecord *record, struct lineFile *lf, char *releaseTag) /* make sure there is no existing release tag, and add one if not */ { struct tdbField *field, *last = NULL; for (field = record->fieldList; field != NULL; last = field, field = field->next) { if (sameString(field->name, "release")) errAbort("Release tag in stanza with include release override line %d of %s", tdbRecordLineIx(record), lf->fileName); } assert(last != NULL); struct tdbField *releaseField; AllocVar(releaseField); last->next = releaseField; releaseField->name = cloneString("release"); releaseField->val = cloneString(releaseTag); } static void checkDupeFields(struct tdbRecord *record, struct lineFile *lf) /* Make sure that each field in record is unique. */ { struct hash *uniqHash = hashNew(0); struct tdbField *field; for (field = record->fieldList; field != NULL; field = field->next) { if (hashLookup(uniqHash, field->name)) errAbort("Duplicate tag %s in record starting line %d of %s", field->name, tdbRecordLineIx(record), lf->fileName); hashAdd(uniqHash, field->name, NULL); } hashFree(&uniqHash); } static void checkDupeKeys(struct tdbRecord *recordList, boolean checkRelease, unsigned currentReleaseBit) /* Make sure that there are no duplicate records (with keys) */ { struct tdbRecord *record; struct hash *uniqHash = hashNew(0); for (record = recordList; record != NULL; record = record->next) { char *key = record->key; if (key != NULL) { struct hashEl *hel; for (hel = hashLookup(uniqHash, key); hel != NULL; hel = hashLookupNext(hel)) { struct tdbRecord *oldRecord = hel->val; struct tdbFilePos *oldPos = oldRecord->posList; struct tdbFilePos *newPos = record->posList; boolean doAbort = TRUE; if (checkRelease) doAbort = compatibleReleases(oldRecord, record, currentReleaseBit); if (doAbort) { char *oldRelease = tdbRecordFieldVal(oldRecord, "release"); char *newRelease = tdbRecordFieldVal(record, "release"); if (newRelease == NULL && oldRelease != NULL) { errAbort("Have release tag for track %s at line %d of %s, but not " "at line %d of %s", key, oldPos->startLineIx, oldPos->fileName, newPos->startLineIx, newPos->fileName); } else if (oldRelease == NULL && newRelease != NULL) { errAbort("Have release tag for track %s at line %d of %s, but not " "at line %d of %s", key, newPos->startLineIx, newPos->fileName, oldPos->startLineIx, oldPos->fileName); } else { if (sameString(oldPos->fileName, newPos->fileName)) { errAbort("Duplicate tracks %s starting lines %d and %d of %s", key, oldPos->startLineIx, newPos->startLineIx, oldPos->fileName); } else errAbort("Duplicate tracks %s starting lines %d of %s and %d of %s", key, oldPos->startLineIx, oldPos->fileName, newPos->startLineIx, newPos->fileName); } } } hashAdd(uniqHash, key, record); } } hashFree(&uniqHash); } static void recurseThroughIncludes(char *fileName, struct lm *lm, struct hash *circularHash, struct tdbRecord **pRecordList, char *releaseTag) /* Recurse through include files. */ { struct tdbRecord *record; struct lineFile *lf = lineFileOpen(fileName, TRUE); while ((record = tdbRecordReadOne(lf, glKeyField, lm)) != NULL) { struct tdbField *firstField = record->fieldList; if (sameString(firstField->name, "include")) { struct tdbField *field; for (field = firstField; field != NULL; field = field->next) { if (!sameString(field->name, "include")) { errAbort("Non-include tag %s in an include stanza starting line %d of %s", field->name, tdbRecordLineIx(record), lf->fileName); } char dir[PATH_LEN]; splitPath(lf->fileName, dir, NULL, NULL); char includeName[PATH_LEN]; char *words[5]; int count = chopLine(field->val, words); if (count > 2) errAbort("Too many words on include line at line %d of %s", tdbRecordLineIx(record), lf->fileName); char *relPath = words[0]; char *subRelease = NULL; if (count == 2) { subRelease = cloneString(words[1]); if (!trackDbCheckValidRelease(subRelease)) errAbort("Include with bad release tag %s at line %d of %s", subRelease, tdbRecordLineIx(record), lf->fileName); } else if (releaseTag != NULL) { subRelease = releaseTag; } if (subRelease && releaseTag && !sameString(subRelease, releaseTag)) errAbort("Include with release %s included from include with release %s at line " "%d of %s", subRelease, releaseTag, tdbRecordLineIx(record), lf->fileName); safef(includeName, sizeof(includeName), "%s%s", dir, relPath); if (hashLookup(circularHash, includeName)) { errAbort("Including file %s in an infinite loop line %d of %s", includeName, tdbRecordLineIx(record), lf->fileName); } recurseThroughIncludes(includeName, lm, circularHash, pRecordList, subRelease); } } else { checkDupeFields(record, lf); if (releaseTag) addReleaseTag(record, lf, releaseTag); if (record->key != NULL) { slAddHead(pRecordList, record); } } } lineFileClose(&lf); } struct tdbRecord *readStartingFromFile(char *fileName, struct lm *lm) /* Read in records from file and any files included from it. */ { struct tdbRecord *recordList = NULL; struct hash *circularHash = hashNew(0); recurseThroughIncludes(fileName, lm, circularHash, &recordList, NULL); hashAdd(circularHash, fileName, NULL); hashFree(&circularHash); slReverse(&recordList); return recordList; } static void mergeRecords(struct tdbRecord *old, struct tdbRecord *record, char *key, struct lm *lm) /* Merge record into old, updating any old fields with new record values. */ { struct tdbField *field; for (field = record->fieldList; field != NULL; field = field->next) { if (!sameString(field->name, key)) { struct tdbField *oldField = tdbRecordField(old, field->name); if (oldField != NULL) oldField->val = field->val; else { lmAllocVar(lm, oldField); oldField->name = field->name; oldField->val = field->val; slAddTail(&old->fieldList, oldField); } } } old->posList = slCat(old->posList, record->posList); } static int parentChildFileDistance(struct tdbRecord *parent, struct tdbRecord *child) /* Return distance of two records. If they're in different files the * distance gets pretty big. Would be flaky on records split across * different files, hence the ad-hoc in the name. Not worth implementing * something that handles this though with the hope that the parent/child * relationship will become indentation rather than ID based. */ { struct tdbFilePos *parentFp = parent->posList, *childFp = child->posList; if (!sameString(parentFp->fileName, childFp->fileName)) return BIGNUM/2; int distance = childFp->startLineIx - parentFp->startLineIx; if (distance < 0) return BIGNUM/4 - distance; return distance; } static struct tdbRecord *findParent(struct tdbRecord *rec, char *parentFieldName, struct hash *hash, unsigned currentReleaseBit) /* Find parent record if possible. This is a bit complicated by wanting to * match parents and children from the same release if possible. Our * strategy is to just ignore records from the wrong release. */ { if (rec->override) // don't do these return NULL; if (clNoCompSub) return NULL; struct tdbField *parentField = tdbRecordField(rec, parentFieldName); if (parentField == NULL) return NULL; char *parentLine = parentField->val; int len = strlen(parentLine); char buf[len+1]; strcpy(buf, parentLine); char *parentName = firstWordInLine(buf); struct hashEl *hel; struct tdbRecord *closestParent = NULL; int closestDistance = BIGNUM; for (hel = hashLookup(hash, parentName); hel != NULL; hel = hashLookupNext(hel)) { struct tdbRecord *parent = hel->val; if (compatibleReleases(rec, parent, currentReleaseBit)) { int distance = parentChildFileDistance(parent, rec); if (distance < closestDistance) { closestParent = parent; closestDistance = distance; } } } if (closestParent != NULL) return closestParent; recordWarn(rec, "parent %s of %s release %s doesn't exist", parentName, rec->key, naForNull(tdbRecordFieldVal(rec, "release"))); return NULL; } static void linkUpParents(struct tdbRecord *list, char *parentField, unsigned currentReleaseBit) /* Link up records according to parent/child relationships. */ { /* Zero out children, parent, and older sibling fields, since going to recalculate * them and need lists to start out empty. */ struct tdbRecord *rec; for (rec = list; rec != NULL; rec = rec->next) rec->parent = rec->olderSibling = rec->children = NULL; /* Build up hash of records indexed by key field. */ struct hash *hash = hashNew(0); for (rec = list; rec != NULL; rec = rec->next) { if (rec->key != NULL) hashAdd(hash, rec->key, rec); } /* Scan through linking up parents. */ for (rec = list; rec != NULL; rec = rec->next) { struct tdbRecord *parent = findParent(rec, parentField, hash, currentReleaseBit); if (parent != NULL) { rec->parent = parent; rec->olderSibling = parent->children; parent->children = rec; } } hashFree(&hash); } struct tdbRecord *tdbsForDbPath(struct dbPath *p, struct lm *lm, char *parentField, unsigned currentReleaseBit) /* Assemble recordList for given database. This looks at the root/organism/assembly * levels. It returns a list of records. */ { struct hash *recordHash = hashNew(0); struct slName *fileLevelList = dbPathToFiles(p), *fileLevel; struct tdbRecord *recordList = NULL; for (fileLevel = fileLevelList; fileLevel != NULL; fileLevel = fileLevel->next) { char *fileName = fileLevel->name; struct tdbRecord *fileRecords = readStartingFromFile(fileName, lm); verbose(2, "Read %d records starting from %s\n", slCount(fileRecords), fileName); fileRecords = filterOnRelease(fileRecords, currentReleaseBit); verbose(2, "After filterOnRelease %d records\n", slCount(fileRecords)); linkUpParents(fileRecords, parentField, currentReleaseBit); checkDupeKeys(fileRecords, TRUE, currentReleaseBit); struct tdbRecord *record, *nextRecord; for (record = fileRecords; record != NULL; record = nextRecord) { nextRecord = record->next; char *key = record->key; struct tdbRecord *oldRecord = hashFindVal(recordHash, key); if (oldRecord != NULL && sameKeyCompatibleRelease(record, oldRecord, currentReleaseBit)) { if (!record->override) { oldRecord->fieldList = record->fieldList; oldRecord->posList = record->posList; } else mergeRecords(oldRecord, record, glKeyField, lm); } else { hashAdd(recordHash, record->key, record); slAddHead(&recordList, record); } } } hashFree(&recordHash); slReverse(&recordList); return recordList; } static void mergeParentRecord(struct tdbRecord *record, struct tdbRecord *parent, struct lm *lm) /* Merge in parent record. This only updates fields that are in parent but not record. */ { struct tdbField *parentField; for (parentField= parent->fieldList; parentField!= NULL; parentField= parentField->next) { struct tdbField *oldField = tdbRecordField(record, parentField->name); if (oldField == NULL) { struct tdbField *newField; lmAllocVar(lm, newField); newField->name = parentField->name; newField->val = parentField->val; slAddTail(&record->fieldList, newField); } } } static void inheritFromParents(struct tdbRecord *list, char *parentField, char *noInheritField, unsigned currentReleaseBit, struct lm *lm) /* Go through list. If an element has a parent field, then fill in non-existent fields from * parent. */ { linkUpParents(list, parentField, currentReleaseBit); /* Scan through doing inheritance. */ struct tdbRecord *rec; for (rec = list; rec != NULL; rec = rec->next) { struct tdbRecord *parent; for (parent = rec->parent; parent != NULL; parent = parent->parent) { if (!clNoCompSub) mergeParentRecord(rec, parent, lm); } } } static char *lookupField(void *record, char *key) /* Lookup a field in a tdbRecord. */ { struct tdbRecord *tdb = record; struct tdbField *field = tdbRecordField(tdb, key); if (field == NULL) return NULL; else return field->val; } static boolean rqlStatementMatch(struct rqlStatement *rql, struct tdbRecord *tdb, struct lm *lm) /* Return TRUE if where clause and tableList in statement evaluates true for tdb. */ { struct rqlParse *whereClause = rql->whereClause; if (whereClause == NULL) return TRUE; else { struct rqlEval res = rqlEvalOnRecord(whereClause, tdb, lookupField, lm); res = rqlEvalCoerceToBoolean(res); return res.val.b; } } static void rqlStatementOutput(struct rqlStatement *rql, struct tdbRecord *tdb, char *addFileField, FILE *out) /* Output fields from tdb to file. If addFileField is non-null add a new * field with this name at end of output. */ { struct slName *fieldList = rql->fieldList, *field; boolean emptyOutput=TRUE; for (field = fieldList; field != NULL; field = field->next) { struct tdbField *r; boolean doWild = anyWild(field->name); for (r = tdb->fieldList; r != NULL; r = r->next) { boolean match; if (doWild) match = wildMatch(field->name, r->name); else match = (strcmp(field->name, r->name) == 0); if (match) { fprintf(out, "%s %s%c", r->name, r->val,(clOneLine?'|':'\n' )); emptyOutput=FALSE; } } } if (!emptyOutput && (!clNoBlank || clOneLine)) fprintf(out, "\n"); } static boolean tableExistsInSelfOrOffspring(char *db, struct tdbRecord *record, int level, struct slRef *parent) /* Return TRUE if table corresponding to track exists in database db. If a parent * track look for tables in kids too. */ { if ( hTableOrSplitExists(db, record->key)) return TRUE; struct tdbRecord *child; if (level > 5) { struct slRef *ancestor; struct dyString *err = dyStringNew(0); dyStringPrintf(err, "Heirarchy too deep from %s", record->key); for (ancestor=parent; ancestor != NULL; ancestor = ancestor->next) { struct tdbRecord *a = ancestor->val; dyStringPrintf(err, " to %s", a->key); } recordAbort(record, "%s", err->string); } struct slRef me; me.next = parent; me.val = record; for (child = record->children; child != NULL; child = child->olderSibling) { if (tableExistsInSelfOrOffspring(db, child, level+1, &me)) return TRUE; } return FALSE; } static int countAncestors(struct tdbRecord *r) /* Return 0 if has no parent, 1 if has a parent, 2 if it has a grandparent, etc. */ { int count = 0; struct tdbRecord *p; for (p = r->parent; p != NULL; p = p->parent) count += 1; return count; } static struct tdbRecord *closestTdbAboveLevel(struct tdbRecord *tdbList, struct tdbFilePos *childPos, int parentDepth) /* Find parent at given depth that comes closest to (but before) childPos. */ { struct tdbRecord *parent, *closestParent = NULL; int closestDistance = BIGNUM; for (parent = tdbList; parent != NULL; parent = parent->next) { if (countAncestors(parent) <= parentDepth) { struct tdbFilePos *pos; for (pos = parent->posList; pos != NULL; pos = pos->next) { if (sameString(pos->fileName, childPos->fileName)) { int distance = childPos->startLineIx - pos->startLineIx; if (distance > 0) { if (distance < closestDistance) { closestDistance = distance; closestParent = parent; } } } } } } return closestParent; } static void checkChildUnderNearestParent(struct tdbRecord *recordList, struct tdbRecord *child) /* Make sure that parent record occurs before child, and that indeed it is the * closest parent before the child. */ { struct tdbRecord *parent = child->parent; int parentDepth = countAncestors(parent); /* We do the check for each file the child is in */ struct tdbFilePos *childFp, *parentFp; for (childFp = child->posList; childFp != NULL; childFp = childFp->next) { /* Find parentFp that is in this file if any. */ for (parentFp = parent->posList; parentFp != NULL; parentFp = parentFp->next) { if (sameString(parentFp->fileName, childFp->fileName)) { if (parentFp->startLineIx > childFp->startLineIx) errAbort("Child before parent in %s\n" "Child (%s) at line %d, parent (%s) at line %d", childFp->fileName, child->key, childFp->startLineIx, parent->key, parentFp->startLineIx); struct tdbRecord *closestParent = closestTdbAboveLevel(recordList, childFp, parentDepth); assert(closestParent != NULL); if (closestParent != parent) errAbort("%s comes between parent (%s) and child (%s) in %s\n" "Parent at line %d, child at line %d.", closestParent->key, parent->key, child->key, childFp->fileName, parentFp->startLineIx, childFp->startLineIx); } } } } static boolean isComplex(char *name) /* Check to see if this is one of the filter variables that have arbitrary initial strings. */ { if (startsWith("yAxisLabel.", name)) return TRUE; if (startsWith("filter.", name)) return TRUE; if (startsWith("filterValues.", name)) return TRUE; if (startsWith("filterValuesDefault.", name)) return TRUE; if (startsWith("filterType.", name)) return TRUE; if (startsWith("filterLimits.", name)) return TRUE; if (startsWith("filterLabel.", name)) return TRUE; if (startsWith("filterByRange.", name)) return TRUE; if (startsWith("filterText.", name)) return TRUE; if (endsWith(name, "Filter")) return TRUE; if (endsWith(name, "FilterValues")) return TRUE; if (endsWith(name, "FilterType")) return TRUE; if (endsWith(name, "FilterLimits")) return TRUE; if (endsWith(name, "FilterText")) return TRUE; +if (startsWith("highlight.", name)) + return TRUE; +if (startsWith("highlightValues.", name)) + return TRUE; +if (startsWith("highlightValuesDefault.", name)) + return TRUE; +if (startsWith("highlightType.", name)) + return TRUE; +if (startsWith("highlightLimits.", name)) + return TRUE; +if (startsWith("highlightLabel.", name)) + return TRUE; +if (startsWith("highlightByRange.", name)) + return TRUE; +if (startsWith("highlightText.", name)) + return TRUE; return FALSE; } static void doRecordChecks(struct tdbRecord *recordList, struct lm *lm) /* Do additional checks on records. */ { /* Check fields against tagTypes.tab. */ struct tdbRecord *record; for (record = recordList; record != NULL; record = record->next) { struct tdbField *typeField = tdbRecordField(record, "type"); char *fullType = (typeField != NULL ? typeField->val : record->key); char *type = lmCloneFirstWord(lm, fullType); struct tdbField *field; for (field = record->fieldList; field != NULL; field = field->next) { if (isComplex(field->name)) continue; struct slName *typeList = rqlHashFindValEvenInWilds(glTagTypes, field->name); if (typeList == NULL) { recordAbort(record, "Tag '%s' not found in %s.\nIf it's not a typo please add %s to " "that file. The tag is", field->name, glTagTypeFile, field->name); } if (!matchAnyWild(typeList, type)) { recordAbort(record, "Tag '%s' not allowed for tracks of type '%s'. " "Please add it to supported types\n" "in %s if this is not a mistake. The tag is", field->name, type, glTagTypeFile); } } } /* Additional child/parent checks. */ for (record = recordList; record != NULL; record = record->next) { if (record->parent != NULL) checkChildUnderNearestParent(recordList, record); } } static void checkLabelLength(struct tdbRecord *record, char *name, int maxLength) /* check to make sure labels conform to max length settings */ { if (maxLength != 0) { // only check leaves if (record->children != NULL) return; struct tdbField *labelField = tdbRecordField(record, name); if (labelField == NULL) recordAbort(record, "missing %s", name); int length = strlen(labelField->val); if (length > maxLength) recordWarn(record, "%s is %s which is %d chars, max is %d", name, labelField->val,length,maxLength); } } void tdbQuery(char *sql) /* tdbQuery - Query the trackDb system using SQL syntax.. */ { /* Load in hash of legitimate tags. */ safef(glTagTypeFile, sizeof(glTagTypeFile), "%s/%s", clRoot, "tagTypes.tab"); glTagTypes = readTagTypeHash(glTagTypeFile); /* Parse out sql statement. */ struct lineFile *lf = lineFileOnString("query", TRUE, cloneString(sql)); struct rqlStatement *rql = rqlStatementParse(lf); lineFileClose(&lf); rqlCheckFieldsExist(rql, glTagTypes, glTagTypeFile); /* Figure out list of databases to work on. */ struct slRef *dbOrderList = NULL, *dbOrder; struct dbPath *db, *dbList = getDbPathList(clRoot); struct slName *t; for (t = rql->tableList; t != NULL; t = t->next) { for (db = dbList; db!= NULL; db = db->next) { if (wildMatch(t->name, db->db)) refAdd(&dbOrderList, db); } } verbose(2, "%d databases in from clause\n", slCount(dbOrderList)); /* Loop through each database. */ int matchCount = 0; struct dyString *fileString = dyStringNew(0); /* Buffer for file field. */ for (dbOrder = dbOrderList; dbOrder != NULL; dbOrder = dbOrder->next) { struct lm *lm = lmInit(0); struct dbPath *p = dbOrder->val; char *db = p->db; struct tdbRecord *recordList = tdbsForDbPath(p, lm, "parent", releaseBit); verbose(2, "Composed %d records from %s\n", slCount(recordList), db); inheritFromParents(recordList, "parent", "noInherit", releaseBit, lm); linkUpParents(recordList, "parent", releaseBit); checkDupeKeys(recordList, FALSE, releaseBit); if (clCheck) doRecordChecks(recordList, lm); struct tdbRecord *record; boolean doSelect = sameString(rql->command, "select"); for (record = recordList; record != NULL; record = record->next) { /* Add "db" field, making sure it doesn't already exist. */ struct tdbField *dbField = tdbRecordField(record, "db"); if (dbField != NULL) recordAbort(record, "using reserved field 'db'"); dbField = tdbFieldNew("db", db, lm); slAddHead(&record->fieldList, dbField); /* Add "filePos" field, making sure it doesn't already exist. */ struct tdbField *fileField = tdbRecordField(record, "filePos"); if (fileField != NULL) recordAbort(record, "using reserved field 'filePos'"); struct tdbFilePos *fp; dyStringClear(fileString); for (fp = record->posList; fp != NULL; fp = fp->next) dyStringPrintf(fileString, " %s %d", fp->fileName, fp->startLineIx); fileField = tdbFieldNew("filePos", fileString->string, lm); slAddTail(&record->fieldList, fileField); if (rqlStatementMatch(rql, record, lm)) { if (!clStrict || tableExistsInSelfOrOffspring(p->db, record, 1, NULL)) { /* check labels if asked */ checkLabelLength(record, "shortLabel", shortLabelLength); checkLabelLength(record, "longLabel", longLabelLength); matchCount += 1; if (doSelect) { rqlStatementOutput(rql, record, "file", stdout); } if (rql->limit >= 0 && matchCount >= rql->limit) break; } } } lmCleanup(&lm); } dyStringFree(&fileString); if (sameString(rql->command, "count")) printf("%d\n", matchCount); rqlStatementFree(&rql); } struct slName *hashPair(char *raFile, char *keyField, char *valField, struct hash **retValHash, struct hash **retCommentHash) /* Read two fields out of a ra file. For records that have both fields put them into * a hash with the logical keys and values, which is returned. Return list of keys in order. */ { struct slName *list = NULL; struct hash *hash = hashNew(0), *commentHash = hashNew(0); struct lineFile *lf = lineFileMayOpen(raFile, TRUE); struct dyString *dy = dyStringNew(0); if (lf != NULL) { while (raSkipLeadingEmptyLines(lf, dy)) { char *theKey = NULL, *theVal = NULL; char *name, *val; while (raNextTagVal(lf, &name, &val, NULL)) { if (sameString(name, keyField)) theKey = lmCloneString(hash->lm, val); else if (sameString(name, valField)) theVal = lmCloneString(hash->lm, val); } if (theKey && theVal) { hashAdd(hash, theKey, theVal); hashAdd(commentHash, theKey, lmCloneString(hash->lm, dy->string)); slNameAddHead(&list, theKey); } } lineFileClose(&lf); } dyStringFree(&dy); *retValHash = hash; *retCommentHash = commentHash; slReverse(&list); return list; } void overrideOrWriteSelf(char *orig, char *name, char *val, char *tagName, char *keyVal, struct hash *hash, struct lineFile *lf, FILE *f) /* Write out name/val pair. */ { if (keyVal == NULL) errAbort("%s tag before track tag line %d of %s", tagName, lf->lineIx, lf->fileName); char *newVal = hashFindVal(hash, keyVal); if (newVal != NULL) { int leadingSpaces = skipLeadingSpaces(orig) - orig; if (orig[0] == '#') errAbort("Rats, internal comments, can't deal with it."); mustWrite(f, orig, leadingSpaces); fprintf(f, "%s %s\n", name, newVal); hashRemove(hash, keyVal); } else fprintf(f, "%s", orig); } void rewriteWithVisAndPriUpdates(char *tIn, char *pIn, char *vIn, char *tOut) /* Write tIn to tOut applying modifications in pIn and pOut */ { struct hash *pHash, *pTextHash; struct slName *pList = hashPair(pIn, "track", "priority", &pHash, &pTextHash); struct hash *vHash, *vTextHash; struct slName *vList = hashPair(vIn, "track", "visibility", &vHash, &vTextHash); struct lineFile *lf = lineFileOpen(tIn, TRUE); FILE *f = mustOpen(tOut, "w"); struct dyString *dy = dyStringNew(0); while (raSkipLeadingEmptyLines(lf, dy)) { fprintf(f, "%s", dy->string); dyStringClear(dy); char *keyVal = NULL; char *name, *val; while (raNextTagVal(lf, &name, &val, dy)) { if (sameString(glKeyField, name)) { fprintf(f, "%s", dy->string); keyVal = cloneString(val); } else if (sameString("priority", name)) { overrideOrWriteSelf(dy->string, name, val, "priority", keyVal, pHash, lf, f); } else if (sameString("visibility", name)) { overrideOrWriteSelf(dy->string, name, val, "visibility", keyVal, vHash, lf, f); } else { fprintf(f, "%s", dy->string); } dyStringClear(dy); } fprintf(f, "%s", dy->string); } struct slName *p, *v; boolean pWroteHead = FALSE; for (p = pList; p != NULL; p = p->next) { char *key = p->name; char *pri = hashFindVal(pHash, key); if (pri != NULL) { if (!pWroteHead) { fprintf(f, "\n#Overrides from priority.ra\n\n"); pWroteHead = TRUE; } char *text = hashFindVal(pTextHash, key); fprintf(f, "%s", text); char *vis = hashFindVal(vHash, key); if (vis != NULL) { char *vText = hashFindVal(vTextHash, key); fprintf(f, "%s", vText); } fprintf(f, "track %s override\n", key); fprintf(f, "priority %s\n", pri); if (vis != NULL) fprintf(f, "visibility %s\n", vis); fprintf(f, "\n"); } } boolean vWroteHead = FALSE; for (v = vList; v != NULL; v = v->next) { char *key = v->name; char *vis = hashFindVal(vHash, key); if (vis != NULL) { char *pri = hashFindVal(pHash, key); if (pri == NULL) /* Already wrote this above if has both. */ { char *text = hashFindVal(vTextHash, key); if (!vWroteHead) { fprintf(f, "\n#Overrides from visibility.ra\n\n"); vWroteHead = TRUE; } fprintf(f, "%s", text); fprintf(f, "track %s override\n", key); fprintf(f, "visibility %s\n", vis); fprintf(f, "\n"); } } } dyStringFree(&dy); carefulClose(&f); lineFileClose(&lf); } static void rewriteInsideSubdir(char *outDir, char *inDir, char *subDir, char *trackFile, char *visFile, char *priFile) /* Do some sort of rewrite on one subdirectory. */ { char tIn[PATH_LEN], vIn[PATH_LEN], pIn[PATH_LEN]; safef(tIn, sizeof(tIn), "%s/%s", inDir, trackFile); safef(vIn, sizeof(vIn), "%s/%s", inDir, visFile); safef(pIn, sizeof(pIn), "%s/%s", inDir, priFile); if (fileExists(tIn)) { if (fileExists(pIn) || fileExists(vIn)) { char tOut[PATH_LEN]; safef(tOut, sizeof(tOut), "%s/%s", outDir, trackFile); makeDirsOnPath(outDir); rewriteWithVisAndPriUpdates(tIn, pIn, vIn, tOut); } } } void doRewrite(char *outDir, char *inDir, char *trackFile, char *visFile, char *priFile) /* Do some sort of rewrite on entire system. */ { struct fileInfo *org, *orgList = listDirX(inDir, "*", FALSE); for (org = orgList; org != NULL; org = org->next) { if (org->isDir) { char inOrgDir[PATH_LEN], outOrgDir[PATH_LEN]; safef(inOrgDir, sizeof(inOrgDir), "%s/%s", inDir, org->name); safef(outOrgDir, sizeof(outOrgDir), "%s/%s", outDir, org->name); rewriteInsideSubdir(outOrgDir, inOrgDir, org->name, trackFile, visFile, priFile); struct fileInfo *db, *dbList = listDirX(inOrgDir, "*", FALSE); for (db = dbList; db != NULL; db = db->next) { if (db->isDir) { char inDbDir[PATH_LEN], outDbDir[PATH_LEN]; safef(inDbDir, sizeof(inDbDir), "%s/%s", inOrgDir, db->name); safef(outDbDir, sizeof(outDbDir), "%s/%s", outOrgDir, db->name); rewriteInsideSubdir(outDbDir, inDbDir, db->name, trackFile, visFile, priFile); } } } } } unsigned getReleaseBit(char *release) /* make sure that the tag is a legal release */ { if (sameString(release, "alpha")) return RELEASE_ALPHA; if (sameString(release, "beta")) return RELEASE_BETA; if (sameString(release, "public")) return RELEASE_PUBLIC; errAbort("release must be alpha, beta, or public"); return 0; /* make compiler happy */ } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); clRoot = simplifyPathToDir(optionVal("root", clRoot)); clCheck = optionExists("check"); clStrict = optionExists("strict"); release = optionVal("release", release); releaseBit = getReleaseBit(release); clNoBlank = optionExists("noBlank"); clOneLine = optionExists("oneLine"); clRewrite = optionVal("rewrite", clRewrite); clNoCompSub = optionExists("noCompSub"); longLabelLength = optionInt("longLabelLength", longLabelLength); shortLabelLength = optionInt("shortLabelLength", shortLabelLength); if (clRewrite) { doRewrite(clRewrite, clRoot, "trackDb.ra", "visibility.ra", "priority.ra"); } else { if (argc != 2) usage(); tdbQuery(argv[1]); } return 0; }