bc9f54cc38a4f72f3a53c3e7bedccec41a1870be
max
  Mon Apr 29 06:26:58 2024 -0700
adding space and skipping long cigar strings on hgc page, refs #33292

diff --git src/lib/bamFile.c src/lib/bamFile.c
index 2a7e339..9f4f9ab 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -1,819 +1,820 @@
 /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "bamFile.h"
 #include "htmshell.h"
 #include "cram/cram_samtools.h"
 #include "cram/sam_header.h"
 #include "cram/cram_structs.h"
 #include "htslib/cram.h"
 #include "udc.h"
 #include "psl.h"
 
 // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load
 // without worrying about the samtools lib creating local cache files in cgi-bin:
 
 static bam_index_t *bamOpenIndexGivenUrl(samfile_t *sam, char *fileOrUrl, char *baiFileOrUrl)
 /* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
  * otherwise return NULL. baiFileOrUrl can be NULL. 
  * The difference to bamOpenIndex is that the URL/filename of the bai file can be specified. */
 {
 if (sam->format.format == cram) 
     {
     if (baiFileOrUrl)
        return sam_index_load2(sam, fileOrUrl, baiFileOrUrl);
     else
        return sam_index_load(sam, fileOrUrl);
     }
 
 // assume that index is a .bai file 
 char indexName[4096];
 bam_index_t *ret = NULL;
 if (baiFileOrUrl==NULL)
     {
     // first try tacking .bai on the end of the bam file name
     safef(indexName, sizeof indexName, "%s.bai", fileOrUrl);
     if ((ret =  sam_index_load2(sam, fileOrUrl, indexName)) == NULL)
         {
         // since the open didn't work, try replacing suffix (if any) with .bai
         safef(indexName, sizeof indexName - sizeof(".bai"), "%s", fileOrUrl);
         char *lastDot = strrchr(indexName, '.');
         if (lastDot)
             {
             strcpy(lastDot, ".bai");
             ret = sam_index_load2(sam, fileOrUrl, indexName);
             }
         }
     }
 else
     ret = sam_index_load2(sam, fileOrUrl, baiFileOrUrl);
 
 return ret;
 }
 
 
 static void bamCloseIdx(bam_index_t **pIdx)
 /* Free unless already NULL. */
 {
 if (pIdx != NULL && *pIdx != NULL)
     {
     free(*pIdx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
     *pIdx = NULL;
     }
 }
 
 static bam_index_t *bamOpenIdx(samfile_t *sam, char *fileOrUrl)
 /* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
  * otherwise return NULL. baiFileOrUrl can be NULL. */
 {
 return bamOpenIndexGivenUrl(sam, fileOrUrl, NULL);
 }
 
 boolean bamFileExists(char *fileOrUrl)
 /* Return TRUE if we can successfully open the bam file and its index file.
  * NOTE: this doesn't give enough diagnostics */
 {
 char *bamFileName = fileOrUrl;
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 if (fh != NULL)
     {
     bam_index_t *idx = bamOpenIdx(fh, bamFileName);
     samclose(fh);
     if (idx == NULL)
 	{
 	warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
 	return FALSE;
 	}
     bamCloseIdx(&idx);
     bamClose(&fh);
     return TRUE;
     }
 return FALSE;
 }
 
 samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
 /* Return an open bam file or errAbort (should be named bamMustOpen).
  * Return parameter if NON-null will return the file name (vestigial; long ago
  * there was a plan to use udcFuse filenames instead of URLs) */
 {
 char *bamFileName = fileOrUrl;
 if (retBamFileName != NULL)
     *retBamFileName = bamFileName;
 
 #ifdef BAM_VERSION
 // suppress too verbose messages in samtools >= 0.1.18; see redmine #6491
 // This variable didn't exist in older versions of samtools (where BAM_VERSION wasn't defined).
 bam_verbose = 1;
 #endif
 
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 // Check both fh and fh->header; non-NULL fh can have NULL header if header doesn't parse!
 if (fh == NULL)
     {
     boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
     struct dyString *urlWarning = dyStringNew(0);
     if (usingUrl && fh == NULL)
 	{
 	dyStringAppend(urlWarning,
 		       ". If you are able to access the URL with your web browser, "
 		       "please try reloading this page.");
 	}
     errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string);
     }
 return fh;
 }
 
 samfile_t *bamMustOpenLocal(char *fileName, char *mode, void *extraHeader)
 /* Open up sam or bam file or die trying.  The mode parameter is 
  *    "r" - open SAM to read
  *    "rb" - open BAM to read
  *    "w" - open SAM to write
  *    "wb" - open BAM to write
  * The extraHeader is generally NULL in the read case, and the write case
  * contains a pointer to a bam_header_t with information about the header.
  * The implementation is just a wrapper around samopen from the samtools library
  * that aborts with error message if there's a problem with the open. */
 {
 samfile_t *sf = samopen(fileName, mode, extraHeader);
 if (sf == NULL)
     errnoAbort("Couldn't open %s.\n", fileName);
 return sf;
 }
 
 void bamClose(samfile_t **pSamFile)
 /* Close down a samfile_t */
 {
 if (pSamFile != NULL)
     {
     samclose(*pSamFile);
     *pSamFile = NULL;
     }
 }
 
 void bamFileAndIndexMustExist(char *fileOrUrl, char *baiFileOrUrl)
 /* Open both a bam file and its accompanying index or errAbort; this is what it
  * takes for diagnostic info to propagate up through errCatches in calling code. */
 {
 samfile_t *bamF = bamOpen(fileOrUrl, NULL);
 bam_index_t *idx = bamOpenIndexGivenUrl(bamF, fileOrUrl, baiFileOrUrl);
 if (idx == NULL)
     errAbort("failed to read index file (.bai) corresponding to %s", fileOrUrl);
 bamCloseIdx(&idx);
 bamClose(&bamF);
 }
 
 void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header,  bam_index_t *idx, char *bamFileName, 
 			 char *position, bam_fetch_f callbackFunc, void *callbackData)
 /* With the open bam file, return items the same way with the callbacks as with bamFetch() */
 /* except in this case use an already-open bam file and index (use bam_index_load and free() for */
 /* the index). It seems a little strange to pass the filename in with the open bam, but */
 /* it's just used to report errors. */
 {
 bam1_t *b;
 AllocVar(b);
 hts_itr_t *iter = sam_itr_querys(idx, header, position);
 if (iter == NULL && startsWith("chr", position))
     iter = sam_itr_querys(idx, header, position + strlen("chr"));
 // Special case to support SARS-CoV-2: If header uses a RefSeq or GenBank accession instead of
 // our "NC_045512v2", go with that.
 if (iter == NULL && header->n_targets == 1 && startsWith("NC_045512v2", position) &&
     (startsWith("NC_045512", header->target_name[0]) ||
      startsWith("MN908947", header->target_name[0])))
     {
     char *colon = strrchr(position, ':');
     char customPos[512];
     if (colon)
         safef(customPos, sizeof customPos, "%s%s", header->target_name[0], colon);
     else
         safecpy(customPos, sizeof customPos, header->target_name[0]);
     iter = sam_itr_querys(idx, header, customPos);
     }
 
 if (iter == NULL)
     return;
 int result;
 while ((result = sam_itr_next(samfile, iter, b)) >= 0) 
     callbackFunc(b, callbackData, header);
 
 // if we're reading a CRAM file and the MD5 string has been set
 // we know there was an error finding the reference and we need
 // to request that it be loaded.
 if (samfile->format.format == cram) 
     {
     char *md5String =  cram_get_Md5(samfile);
     if (!isEmpty(md5String))
         {
         char server[4096];
         char pendingFile[4096];
         char errorFile[4096];
 
         char *refPath = cram_get_ref_url(samfile);
         char *cacheDir = cram_get_cache_dir(samfile);
 
         sprintf(server, refPath, md5String);
         sprintf(pendingFile, "%s/pending/",cacheDir);
         sprintf(errorFile, "%s/error/%s",cacheDir,md5String);
         makeDirsOnPath(pendingFile);
         sprintf(pendingFile, "%s/pending/%s",cacheDir,md5String);
         FILE *downFile;
         if ((downFile = fopen(errorFile, "r")) != NULL)
             {
             char errorBuf[4096];
             mustGetLine(downFile, errorBuf, sizeof errorBuf);
             errAbort("cannot find reference %s.  Error: %s\n", md5String,errorBuf);
             }
         else
             {
             if ((downFile = fopen(pendingFile, "w")) == NULL)
                 errAbort("cannot find reference %s.  Cannot create file %s.",md5String, pendingFile);  
             fprintf(downFile,  "%s\n", server);
             fclose(downFile);
             errAbort("Cannot find reference %s.  Downloading from %s. Please refresh screen to check download status.",md5String, server);
             }
         }
     }
 }
 
 void bamAndIndexFetchPlus(char *fileOrUrl, char *baiFileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile, char *refUrl, char *cacheDir)
 /* Open the .bam file with the .bai index specified by baiFileOrUrl.
  * baiFileOrUrl can be NULL and defaults to <fileOrUrl>.bai.
  * Fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
  * the benefit of the callback function, with the open samFile.  */
 {
 char *bamFileName = NULL;
 samfile_t *fh = bamOpen(fileOrUrl, &bamFileName);
 if (fh->format.format == cram) 
     {
     if (cacheDir == NULL)
         errAbort("CRAM cache dir hg.conf variable (cramRef) must exist for CRAM support");
     cram_set_cache_url(fh, cacheDir, refUrl);  
     }
 bam_hdr_t *header = sam_hdr_read(fh);
 if (pSamFile != NULL)
     *pSamFile = fh;
 bam_index_t *idx = bamOpenIndexGivenUrl(fh, bamFileName, baiFileOrUrl);
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
     bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData);
     bamCloseIdx(&idx);
     }
 bamClose(&fh);
 }
 
 static int bamGetTargetCount(char *fileOrUrl)
 /* Return the number of target sequences in a bam or cram file. */
 {
 int tCount = 0;
 htsFile *htsF = hts_open(fileOrUrl, "r");
 if (htsF->format.format == crai)
     {
     SAM_hdr *cramHdr = cram_fd_get_header(htsF->fp.cram);
     tCount = cramHdr->nref;
     }
 else
     {
     bam_hdr_t *bamHdr = bam_hdr_read(htsF->fp.bgzf);
     tCount = bamHdr->n_targets;
     }
 hts_close(htsF);
 return tCount;
 }
 
 long long bamFileItemCount(char *fileOrUrl, char *baiFileOrUrl)
 /* Return the total number of mapped items across all sequences in fileOrUrl, using index file.
  * If baiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.bai.
  * NOTE: not all bam index files include mapped item counts, so this may return 0 even for large
  * bam.  As of May 2019, our copy of hts_idx_get_stat does not support cram indexes
  * (perhaps they never include counts?), so this always returns 0 for cram. */
 {
 long long itemCount = 0;
 hts_idx_t *idx = NULL;
 if (isNotEmpty(baiFileOrUrl))
     idx = hts_idx_load2(fileOrUrl, baiFileOrUrl);
 else
     {
     int format = endsWith(fileOrUrl, ".cram") ? HTS_FMT_CRAI : HTS_FMT_BAI;
     idx = hts_idx_load(fileOrUrl, format);
     }
 if (idx == NULL)
     warn("bamFileItemCount: hts_idx_load(%s) failed.", baiFileOrUrl ? baiFileOrUrl : fileOrUrl);
 else
     {
     int tCount = bamGetTargetCount(fileOrUrl);
     int tid;
     for (tid = 0;  tid < tCount;  tid++)
         {
         uint64_t mapped, unmapped;
         int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped);
         if (ret == 0)
             itemCount += mapped;
         // ret is -1 if counts are unavailable.
         }
     hts_idx_destroy(idx);
     idx = NULL;
     }
 return itemCount;
 }
 
 void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile, char *refUrl, char *cacheDir)
 {
 bamAndIndexFetchPlus(fileOrUrl, NULL, position, callbackFunc, callbackData, pSamFile, refUrl, cacheDir);
 }
 
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile)
 {
 bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile,
     NULL, NULL);
 }
 
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
 const bam1_core_t *core = &bam->core;
 return (core->flag & BAM_FREVERSE);
 }
 
 void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
 /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
  * the beginning of the query sequence and quality; likewise for retHigh at end.
  * For convenience, retClippedQLen is the original query length minus soft clipping
  * (and the length of the query sequence that will be returned). */
 {
 unsigned int *cigarPacked = bam1_cigar(bam);
 const bam1_core_t *core = &bam->core;
 char op;
 int n = bamUnpackCigarElement(cigarPacked[0], &op);
 int low = (op == 'S') ? n : 0;
 n = bamUnpackCigarElement(cigarPacked[core->n_cigar-1], &op);
 int high = (op == 'S') ? n : 0;
 if (retLow != NULL)
     *retLow = low;
 if (retHigh != NULL)
     *retHigh = high;
 if (retClippedQLen != NULL)
     *retClippedQLen = (core->l_qseq - low - high);
 }
 
 
 void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq)
 /* Fill in qSeq with the nucleotide sequence encoded in bam.  The BAM format 
  * reverse-complements query sequence when the alignment is on the - strand,
  * so if useStrand is given we rev-comp it back to restore the original query 
  * sequence. */
 {
 const bam1_core_t *core = &bam->core;
 int qLen = core->l_qseq;
 uint8_t *packedQSeq = bam1_seq(bam);
 int i;
 for (i = 0; i < qLen; i++)
     qSeq[i] = bam_nt16_rev_table[bam1_seqi(packedQSeq, i)];
 qSeq[i] = '\0';
 if (useStrand && bamIsRc(bam))
     reverseComplement(qSeq, qLen);
 }
 
 char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand)
 /* Allocate and return the nucleotide sequence encoded in bam.  The BAM format 
  * reverse-complements query sequence when the alignment is on the - strand,
  * so if useStrand is given we rev-comp it back to restore the original query 
  * sequence. */
 {
 const bam1_core_t *core = &bam->core;
 int qLen = core->l_qseq;
 char *qSeq = needMem(qLen+1);
 bamUnpackQuerySequence(bam, useStrand, qSeq);
 return qSeq;
 }
 
 UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
 /* Return the base quality scores encoded in bam as an array of ubytes. */
 {
 const bam1_core_t *core = &bam->core;
 int qLen = core->l_qseq;
 UBYTE *arr = needMem(qLen);
 boolean isRc = useStrand && bamIsRc(bam);
 UBYTE *qualStr = bam1_qual(bam);
 int i;
 for (i = 0;  i < core->l_qseq;  i++)
     {
     int offset = isRc ? (qLen - 1 - i) : i;
     arr[i] = (qualStr[0] == 255) ? 255 : qualStr[offset];
     }
 return arr;
 }
 
 void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar)
 /* Unpack CIGAR string into dynamic string */
 {
 unsigned int *cigarPacked = bam1_cigar(bam);
 const bam1_core_t *core = &bam->core;
 int i;
 for (i = 0;  i < core->n_cigar;  i++)
     {
     char op;
     int n = bamUnpackCigarElement(cigarPacked[i], &op);
     dyStringPrintf(dyCigar, "%d", n);
     dyStringAppendC(dyCigar, op);
+    dyStringAppendC(dyCigar, ' ');
     }
 }
 
 char *bamGetCigar(const bam1_t *bam)
 /* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
 {
 const bam1_core_t *core = &bam->core;
 struct dyString *dyCigar = dyStringNew(min(8, core->n_cigar*4));
 bamUnpackCigar(bam, dyCigar);
 return dyStringCannibalize(&dyCigar);
 }
 
 void bamShowCigarEnglish(const bam1_t *bam)
 /* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
 {
 unsigned int *cigarPacked = bam1_cigar(bam);
 const bam1_core_t *core = &bam->core;
 int i;
 for (i = 0;  i < core->n_cigar;  i++)
     {
     char op;
     int n = bamUnpackCigarElement(cigarPacked[i], &op);
     if (i > 0)
 	printf(", ");
     switch (op)
 	{
 	case 'M': // match or mismatch (gapless aligned block)
 	    printf("%d (mis)Match", n);
 	    break;
 	case '=': // match
 	    printf("%d Match", n);
 	    break;
 	case 'X': // mismatch
 	    printf("%d Mismatch", n);
 	    break;
 	case 'I': // inserted in query
 	    printf("%d Insertion", n);
 	    break;
 	case 'S': // skipped query bases at beginning or end ("soft clipping")
 	    printf("%d Skipped", n);
 	    break;
 	case 'D': // deleted from query
 	    printf("%d Deletion", n);
 	    break;
 	case 'N': // long deletion from query (intron as opposed to small del)
 	    printf("%d deletioN", n);
 	    break;
 	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
 	    printf("%d Hard clipped query", n);
 	    break;
 	case 'P': // P="silent deletion from padded reference sequence"
 	    printf("%d Padded / silent deletion", n);
 	    break;
 	default:
 	    errAbort("bamShowCigarEnglish: unrecognized CIGAR op %c -- update me", op);
 	}
     }
 }
 
 static void descFlag(unsigned flag, unsigned bitMask, char *desc, boolean makeRed,
 	      boolean *retFirst)
 /* Describe a flag bit (or multi-bit mask) if it is set in flag. */
 {
 if ((flag & bitMask) == bitMask) // *all* bits in bitMask are set in flag
     {
     if (!*retFirst)
 	printf(" | ");
     printf("<span%s>(<TT>0x%02x</TT>) %s</span>",
 	   (makeRed ? " style='color: red'" : ""), bitMask, desc);
     *retFirst = FALSE;
     }
 }
 
 void bamShowFlagsEnglish(const bam1_t *bam)
 /* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */
 {
 const bam1_core_t *core = &bam->core;
 unsigned flag = core->flag;
 boolean first = TRUE;
 descFlag(flag, BAM_FDUP, "Optical or PCR duplicate", TRUE, &first);
 descFlag(flag, BAM_FQCFAIL, "QC failure", TRUE, &first);
 descFlag(flag, BAM_FSECONDARY, "Not primary alignment", TRUE, &first);
 descFlag(flag, BAM_FREAD2, "Read 2 of pair", FALSE, &first);
 descFlag(flag, BAM_FREAD1, "Read 1 of pair", FALSE, &first);
 descFlag(flag, BAM_FMREVERSE, "Mate is on '-' strand", FALSE, &first);
 descFlag(flag, BAM_FREVERSE, "Read is on '-' strand", FALSE, &first);
 descFlag(flag, BAM_FMUNMAP, "Mate is unmapped", TRUE, &first);
 if (flag & BAM_FUNMAP)
     errAbort("Read is unmapped (what is it doing here?!?)");
 descFlag(flag, (BAM_FPROPER_PAIR | BAM_FPAIRED), "Properly paired", FALSE, &first);
 if ((flag & BAM_FPAIRED) && !(flag & BAM_FPROPER_PAIR))
     descFlag(flag, BAM_FPAIRED, "Not properly paired", TRUE, &first);
 }
 
 int bamGetTargetLength(const bam1_t *bam)
 /* Tally up the alignment's length on the reference sequence from
  * bam's packed-int CIGAR representation. */
 {
 unsigned int *cigarPacked = bam1_cigar(bam);
 const bam1_core_t *core = &bam->core;
 int tLength=0;
 int i;
 for (i = 0;  i < core->n_cigar;  i++)
     {
     char op;
     int n = bamUnpackCigarElement(cigarPacked[i], &op);
     switch (op)
 	{
 	case 'M': // match or mismatch (gapless aligned block)
 	case '=': // match
 	case 'X': // mismatch
 	    tLength += n;
 	    break;
 	case 'I': // inserted in query
 	    break;
 	case 'D': // deleted from query
 	case 'N': // long deletion from query (intron as opposed to small del)
 	    tLength += n;
 	    break;
 	case 'S': // skipped query bases at beginning or end ("soft clipping")
 	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
 	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
 	    break;
 	default:
 	    errAbort("bamGetTargetLength: unrecognized CIGAR op %c -- update me", op);
 	}
     }
 return tLength;
 }
 
 bam1_t *bamClone(const bam1_t *bam)
 /* Return a newly allocated copy of bam. */
 {
 // Using typecasts to get around compiler complaints about bam being const:
 bam1_t *newBam = cloneMem((void *)bam, sizeof(*bam));
 newBam->data = cloneMem((void *)bam->data, bam->data_len*sizeof(bam->data[0]));
 return newBam;
 }
 
 static inline int typeSize(int x)
 /* Return size of basic BAM types. */
 {
 if (x == 'C' || x == 'c' || x == 'A') return 1;
 else if (x == 'S' || x == 's') return 2;
 else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4;
 else return 0;
 }      
 
 static void printType(uint8_t type, uint8_t **str)
 /* Print out a string that has a value of the given type. */
 {
 uint8_t *s = *str;
 if (type == 'A') { printf("%c", *s); ++s; }
 else if (type == 'C') { printf("%u", *s); ++s; }
 else if (type == 'c') { printf("%d", *(int8_t*)s); ++s; }
 else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; }
 else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; }
 else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; }
 else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; }
 else if (type == 'f') { printf("%g", *(float*)s); s += 4; }
 else if (type == 'd') { printf("%lg", *(double*)s); s += 8; }
 else if (type == 'B')
     {
     uint8_t subType =  *s++;
     int count =  *(int32_t*)s;
     s += 4;
 
     int ii;
     for (ii=0; ii < count; ii++)
         {
         printType(subType, &s);
         if (ii < count - 1)
             printf(",");
         }
     }
 else if (type == 'Z' || type == 'H')
     {
     htmTextOut(stdout, (char *)s);
     s += strlen((char *)s) + 1;
     }
 else
     errAbort("missing BAM type %c", type);
 *str = s;
 }
 
 void bamShowTags(const bam1_t *bam)
 /* Print out tags in HTML: bold key, no type indicator for brevity. */
 {
 // adapted from part of bam.c bam_format1:
 uint8_t *s = bam1_aux(bam);
 while (s < bam->data + bam->data_len)
     {
     uint8_t type, key[2];
     key[0] = s[0]; key[1] = s[1];
     s += 2; type = *s; ++s;
     printf(" <B>%c%c</B>:", key[0], key[1]);
     printType(type, &s);
     }
 putc('\n', stdout);
 }
 
 char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize)
 /* If bam's tags include the given 2-character tag, place the value into 
  * buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
  * or NULL if tag is not present. */
 {
 if (tag == NULL)
     errAbort("NULL tag passed to bamGetTagString");
 if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0'))
     errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag));
 char *val = NULL;
 // adapted from part of bam.c bam_format1:
 uint8_t *s = bam1_aux(bam);
 while (s < bam->data + bam->data_len)
     {
     uint8_t type, key[2];
     key[0] = s[0]; key[1] = s[1];
     s += 2; type = *s; ++s;
     if (key[0] == tag[0] && key[1] == tag[1])
 	{
 	if (type == 'A') { snprintf(buf, bufSize, "%c", *s);}
 	else if (type == 'C') { snprintf(buf, bufSize, "%u", *s); }
 	else if (type == 'c') { snprintf(buf, bufSize, "%d", *s); }
 	else if (type == 'S') { snprintf(buf, bufSize, "%u", *(uint16_t*)s); }
 	else if (type == 's') { snprintf(buf, bufSize, "%d", *(int16_t*)s); }
 	else if (type == 'I') { snprintf(buf, bufSize, "%u", *(uint32_t*)s); }
 	else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); }
 	else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); }
 	else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); }
 	else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize);
 	else buf[0] = '\0';
 	buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages
 	val = buf;
 	break;
 	}
     else
 	{
 	if (type == 'A' || type == 'C' || type == 'c') { ++s; }
 	else if (type == 'S' || type == 's') { s += 2; }
 	else if (type == 'I' || type == 'i' || type == 'f') { s += 4; }
 	else if (type == 'd') { s += 8; }
 	else if (type == 'B') 
             {
             // 5 is for type byte and a following int32
             s += 5 + typeSize(*(s)) * (*(int32_t*)((s)+1));
             }
 	else if (type == 'Z' || type == 'H')
 	    {
 	    while (*s++);
 	    }
 	}
     }
 return val;
 }
 
 void bamUnpackAux(const bam1_t *bam, struct dyString *dy)
 /* Unpack the tag:type:val part of bam into dy */
 {
 // adapted from part of bam.c bam_format1:
 uint8_t *s = bam1_aux(bam);
 boolean firstTime = TRUE;
 while (s < bam->data + bam->data_len)
     {
     if (firstTime)
         firstTime = FALSE;
     else
         dyStringAppendC(dy, '\t');
     dyStringAppendC(dy, *s++);
     dyStringAppendC(dy, *s++);
     dyStringAppendC(dy, ':');
     dyStringAppendC(dy, s[0]);
     dyStringAppendC(dy, ':');
     uint8_t type = *s++;
     if (type == 'A') { dyStringPrintf(dy, "%c", *s); ++s; }
     else if (type == 'C') { dyStringPrintf(dy, "%u", *s); ++s; }
     else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; }
     else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; }
     else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; }
     else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; }
     else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; }
     else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; }
     else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; }
     else if (type == 'Z' || type == 'H')
 	{
 	dyStringAppend(dy, (char *)s);
 	s += strlen((char *)s) + 1;
 	}
     }
 }
 
 struct psl *bamToPslUnscored2(const bam1_t *bam, const bam_hdr_t *hdr, int inclHardClipped)
 /* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
  * no scoring info).  If inclHardClipped is True, the size of the hard-clipped regions
  * are include in the PSL.  This is required for supplementary alignments to have the
  * correct query sizes and offsets.
  */
 {
 const bam1_core_t *core = &bam->core;
 
 if (core->tid == -1)
     return NULL;
 
 struct psl *psl;
 AllocVar(psl);
 boolean isRc = (core->flag & BAM_FREVERSE);
 psl->strand[0] = isRc ? '-' : '+';
 psl->qName = cloneString(bam1_qname(bam));
 psl->tName = cloneString(hdr->target_name[core->tid]);
 unsigned blockCount = 0;
 unsigned *blockSizes, *qStarts, *tStarts;
 AllocArray(blockSizes, core->n_cigar);
 AllocArray(qStarts, core->n_cigar);
 AllocArray(tStarts, core->n_cigar);
 int tPos = core->pos, qPos = 0, qLength = 0;
 unsigned int *cigar = bam1_cigar(bam);
 int i;
 for (i = 0;  i < core->n_cigar;  i++)
     {
     char op;
     int n = bamUnpackCigarElement(cigar[i], &op);
     switch (op)
 	{
 	case 'X': // mismatch (gapless aligned block)
 	case '=': // match (gapless aligned block)
 	case 'M': // match or mismatch (gapless aligned block)
 	    blockSizes[blockCount] = n;
 	    qStarts[blockCount] = qPos;
 	    tStarts[blockCount] = tPos;
 	    blockCount++;
 	    tPos += n;
 	    qPos += n;
 	    qLength += n;
             if (op == 'X')
                psl->misMatch += n;
             else
                psl->match += n;
 	    break;
 	case 'I': // inserted in query
 	    qPos += n;
 	    qLength += n;
 	    break;
 	case 'D': // deleted from query
 	case 'N': // long deletion from query (intron as opposed to small del)
 	    tPos += n;
 	    break;
 	case 'S': // skipped query bases at beginning or end ("soft clipping")
 	    qPos += n;
 	    qLength += n;
 	    break;
 	case 'H': // note query bases not stored in record's query sequence ("hard clipping")
             if (inclHardClipped)
                 {
                 qPos += n;
                 qLength += n;
                 }
 	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
 	    break;
 	default:
 	    errAbort("bamToPsl: unrecognized CIGAR op %c -- update me", op);
 	}
     }
 
 if (blockCount == 0)
     {
     pslFree(&psl);
     return NULL;
     }
 
 psl->tSize = hdr->target_len[core->tid];
 psl->tStart = tStarts[0];
 psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1];
 psl->qSize = qLength;
 psl->qStart = qStarts[0];
 psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1];
 if (isRc)
     reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
 psl->blockCount = blockCount;
 psl->blockSizes = blockSizes;
 psl->qStarts = qStarts;
 psl->tStarts = tStarts;
 pslComputeInsertCounts(psl);
 return psl;
 }
 
 struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr)
 /* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
  * no scoring info) */
 {
 return bamToPslUnscored2(bam, hdr, FALSE);
 }