git: c750cbac5801 - main - biology/checkm: Fix build with setuptools 58.0.0+
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 25 Mar 2022 13:49:36 UTC
The branch main has been updated by sunpoet: URL: https://cgit.FreeBSD.org/ports/commit/?id=c750cbac580143e48a23ed5af71ac671614b8015 commit c750cbac580143e48a23ed5af71ac671614b8015 Author: Po-Chuan Hsieh <sunpoet@FreeBSD.org> AuthorDate: 2022-03-25 13:32:00 +0000 Commit: Po-Chuan Hsieh <sunpoet@FreeBSD.org> CommitDate: 2022-03-25 13:38:04 +0000 biology/checkm: Fix build with setuptools 58.0.0+ With hat: python --- biology/checkm/Makefile | 2 +- biology/checkm/files/patch-2to3 | 648 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 649 insertions(+), 1 deletion(-) diff --git a/biology/checkm/Makefile b/biology/checkm/Makefile index 302c1ba374d0..6bb02b8266de 100644 --- a/biology/checkm/Makefile +++ b/biology/checkm/Makefile @@ -15,7 +15,7 @@ RUN_DEPENDS= ${PYNUMPY} \ ${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \ ${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR} -USES= python:3.7+ +USES= dos2unix python:3.7+ USE_GITHUB= yes GH_ACCOUNT= Ecogenomics USE_PYTHON= distutils noflavors autoplist diff --git a/biology/checkm/files/patch-2to3 b/biology/checkm/files/patch-2to3 new file mode 100644 index 000000000000..04b37972c86e --- /dev/null +++ b/biology/checkm/files/patch-2to3 @@ -0,0 +1,648 @@ +--- checkm/binTools.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/binTools.py +@@ -26,7 +26,7 @@ import gzip + + import numpy as np + +-from common import binIdFromFilename, checkFileExists, readDistribution, findNearest ++from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest + from checkm.util.seqUtils import readFasta, writeFasta, baseCount + from checkm.genomicSignatures import GenomicSignatures + from checkm.prodigal import ProdigalGeneFeatureParser +@@ -123,34 +123,34 @@ class BinTools(): + seqId = line[1:].split(None, 1)[0] + + if seqId in seqIds: +- print ' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId) ++ print(' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)) + seqIds.add(seqId) + + binSeqs[binId] = seqIds + + # check for sequences assigned to multiple bins + bDuplicates = False +- binIds = binSeqs.keys() +- for i in xrange(0, len(binIds)): +- for j in xrange(i + 1, len(binIds)): ++ binIds = list(binSeqs.keys()) ++ for i in range(0, len(binIds)): ++ for j in range(i + 1, len(binIds)): + seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]])) + + if len(seqInter) > 0: + bDuplicates = True +- print ' Sequences shared between %s and %s: ' % (binIds[i], binIds[j]) ++ print(' Sequences shared between %s and %s: ' % (binIds[i], binIds[j])) + for seqId in seqInter: +- print ' ' + seqId +- print '' ++ print(' ' + seqId) ++ print('') + + if not bDuplicates: +- print ' No sequences assigned to multiple bins.' ++ print(' No sequences assigned to multiple bins.') + + def gcDist(self, seqs): + """GC statistics for bin.""" + GCs = [] + gcTotal = 0 + basesTotal = 0 +- for _, seq in seqs.iteritems(): ++ for _, seq in seqs.items(): + a, c, g, t = baseCount(seq) + gc = g + c + bases = a + c + g + t +@@ -171,7 +171,7 @@ class BinTools(): + + codingBasesTotal = 0 + basesTotal = 0 +- for seqId, seq in seqs.iteritems(): ++ for seqId, seq in seqs.items(): + codingBases = prodigalParser.codingBases(seqId) + + CDs.append(float(codingBases) / len(seq)) +@@ -186,11 +186,11 @@ class BinTools(): + def binTetraSig(self, seqs, tetraSigs): + """Tetranucleotide signature for bin. """ + binSize = 0 +- for _, seq in seqs.iteritems(): ++ for _, seq in seqs.items(): + binSize += len(seq) + + bInit = True +- for seqId, seq in seqs.iteritems(): ++ for seqId, seq in seqs.items(): + weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize) + if bInit: + binSig = weightedTetraSig +@@ -247,32 +247,32 @@ class BinTools(): + meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser) + + # find keys into GC and CD distributions +- closestGC = findNearest(np.array(gcBounds.keys()), meanGC) +- sampleSeqLen = gcBounds[closestGC].keys()[0] ++ closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC) ++ sampleSeqLen = list(gcBounds[closestGC].keys())[0] + d = gcBounds[closestGC][sampleSeqLen] +- gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0) +- gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0) ++ gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0) ++ gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0) + +- closestCD = findNearest(np.array(cdBounds.keys()), meanCD) +- sampleSeqLen = cdBounds[closestCD].keys()[0] ++ closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD) ++ sampleSeqLen = list(cdBounds[closestCD].keys())[0] + d = cdBounds[closestCD][sampleSeqLen] +- cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0) ++ cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0) + +- tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution) ++ tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution) + + index = 0 +- for seqId, seq in seqs.iteritems(): ++ for seqId, seq in seqs.items(): + seqLen = len(seq) + + # find GC, CD, and TD bounds +- closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen) ++ closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen) + gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey] + gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey] + +- closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen) ++ closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen) + cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey] + +- closestSeqLen = findNearest(tdBounds.keys(), seqLen) ++ closestSeqLen = findNearest(list(tdBounds.keys()), seqLen) + tdBound = tdBounds[closestSeqLen][tdBoundKey] + + outlyingDists = [] +--- checkm/checkmData.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/checkmData.py +@@ -85,11 +85,11 @@ class DBConfig(object): + """Work out if we have permission to write to the CheckM config before attempting to make changes""" + try: + open(self.configFile, 'a') +- except IOError, e: +- print "You do not seem to have permission to edit the checkm config file" +- print "located at %s" % self.configFile +- print "Please try again with updated privileges. Error was:\n" +- print e ++ except IOError as e: ++ print("You do not seem to have permission to edit the checkm config file") ++ print("located at %s" % self.configFile) ++ print("Please try again with updated privileges. Error was:\n") ++ print(e) + return False + return True + +@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager): + else: + path = os.path.abspath(os.path.expanduser(path)) + +- print "" ++ print("") + if os.path.exists(path): + # path exists + if os.access(path, os.W_OK): + # path is writable + path_set = True +- print "Path [%s] exists and you have permission to write to this folder." % path ++ print("Path [%s] exists and you have permission to write to this folder." % path) + else: +- print "Path [%s] exists but you do not have permission to write to this folder." % path ++ print("Path [%s] exists but you do not have permission to write to this folder." % path) + else: + # path does not exist, try to make it + "Path [%s] does not exist so I will attempt to create it" % path + try: + self.makeSurePathExists(path) +- print "Path [%s] has been created and you have permission to write to this folder." % path ++ print("Path [%s] has been created and you have permission to write to this folder." % path) + path_set = True + except Exception: +- print "Unable to make the folder, Error was: %s" % sys.exc_info()[0] ++ print("Unable to make the folder, Error was: %s" % sys.exc_info()[0]) + minimal = True + + # (re)make the manifest file +- print "(re) creating manifest file (please be patient)." ++ print("(re) creating manifest file (please be patient).") + self.createManifest(path, self.config.values["localManifestName"]) + + return path +@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager): + def checkPermissions(self): + """See if the user has permission to write to the data directory""" + if not os.access(self.config.values["dataRoot"], os.W_OK): +- print "You do not seem to have permission to edit the CheckM data folder" +- print "located at %s" % self.config.values["dataRoot"] ++ print("You do not seem to have permission to edit the CheckM data folder") ++ print("located at %s" % self.config.values["dataRoot"]) + return False + + return True +--- checkm/coverage.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/coverage.py +@@ -62,7 +62,7 @@ class Coverage(): + binId = binIdFromFilename(binFile) + + seqs = readFasta(binFile) +- for seqId, seq in seqs.iteritems(): ++ for seqId, seq in seqs.items(): + seqIdToBinId[seqId] = binId + seqIdToSeqLen[seqId] = len(seq) + +@@ -97,12 +97,12 @@ class Coverage(): + print(header) + + # get length of all seqs +- for bamFile, seqIds in coverageInfo.iteritems(): +- for seqId in seqIds.keys(): ++ for bamFile, seqIds in coverageInfo.items(): ++ for seqId in list(seqIds.keys()): + seqIdToSeqLen[seqId] = seqIds[seqId].seqLen + + # write coverage stats for all scaffolds to file +- for seqId, seqLen in seqIdToSeqLen.iteritems(): ++ for seqId, seqLen in seqIdToSeqLen.items(): + rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen) + for bamFile in bamFiles: + bamId = binIdFromFilename(bamFile) +@@ -171,7 +171,7 @@ class Coverage(): + writeProc.join() + except: + # make sure all processes are terminated +- print traceback.format_exc() ++ print(traceback.format_exc()) + for p in workerProc: + p.terminate() + +@@ -271,16 +271,16 @@ class Coverage(): + if self.logger.getEffectiveLevel() <= logging.INFO: + sys.stderr.write('\n') + +- print '' +- print ' # total reads: %d' % totalReads +- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads) +- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads) +- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads) +- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads) +- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads) +- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads) +- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads) +- print '' ++ print('') ++ print(' # total reads: %d' % totalReads) ++ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)) ++ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)) ++ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)) ++ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)) ++ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)) ++ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)) ++ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)) ++ print('') + + def parseCoverage(self, coverageFile): + """Read coverage information from file.""" +@@ -301,7 +301,7 @@ class Coverage(): + if seqId not in coverageStats[binId]: + coverageStats[binId][seqId] = {} + +- for i in xrange(3, len(lineSplit), 3): ++ for i in range(3, len(lineSplit), 3): + bamId = lineSplit[i] + coverage = float(lineSplit[i + 1]) + coverageStats[binId][seqId][bamId] = coverage +@@ -325,7 +325,7 @@ class Coverage(): + + # calculate mean coverage (weighted by scaffold length) + # for each bin under each BAM file +- for i in xrange(3, len(lineSplit), 3): ++ for i in range(3, len(lineSplit), 3): + bamId = lineSplit[i] + coverage = float(lineSplit[i + 1]) + binCoverages[binId][bamId].append(coverage) +@@ -341,13 +341,13 @@ class Coverage(): + + profiles = defaultdict(dict) + for binId in binStats: +- for bamId, stats in binStats[binId].iteritems(): ++ for bamId, stats in binStats[binId].items(): + binLength, meanBinCoverage = stats + coverages = binCoverages[binId][bamId] + + varCoverage = 0 + if len(coverages) > 1: +- varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages)) ++ varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages]) + + profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)] + +--- checkm/coverageWindows.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/coverageWindows.py +@@ -188,10 +188,10 @@ class CoverageWindows(): + try: + end += windowSize + except: +- print '*****************' +- print end +- print windowSize +- print '******************' ++ print('*****************') ++ print(end) ++ print(windowSize) ++ print('******************') + + coverage = float(sum(readLoader.coverage)) / seqLen + +@@ -239,13 +239,13 @@ class CoverageWindows(): + if self.logger.getEffectiveLevel() <= logging.INFO: + sys.stderr.write('\n') + +- print '' +- print ' # total reads: %d' % totalReads +- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads) +- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads) +- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads) +- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads) +- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads) +- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads) +- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads) +- print '' ++ print('') ++ print(' # total reads: %d' % totalReads) ++ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)) ++ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)) ++ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)) ++ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)) ++ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)) ++ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)) ++ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)) ++ print('') +--- checkm/manifestManager.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/manifestManager.py +@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest" + # system includes + import os + import hashlib +-import urllib2 +-import urllib ++import urllib.request, urllib.error, urllib.parse ++import urllib.request, urllib.parse, urllib.error + import shutil + import errno + +@@ -121,15 +121,15 @@ class ManifestManager(object): + source = "" + # first we assume it is remote + try: +- s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout) ++ s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout) + source = sourceManifestLocation + "/" + except ValueError: + # then it is probably a file + s_man = open(os.path.join(sourceManifestLocation, sourceManifestName)) + source = os.path.join(sourceManifestLocation) + os.path.sep +- except urllib2.URLError: ++ except urllib.error.URLError: + # problems connecting to server, perhaps user is behind a proxy or firewall +- print "Error: failed to connect to server." ++ print("Error: failed to connect to server.") + return (None, None, None, None, None) + + first_line = True +@@ -140,11 +140,11 @@ class ManifestManager(object): + # get the type of the manifest + s_type = self.getManType(line) + if s_type != l_type: +- print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type) ++ print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)) + return (None, None, None, None, None) + else: + # no type specified +- print "Error: type of source manifest is not specified. Is this a valid manifest file?" ++ print("Error: type of source manifest is not specified. Is this a valid manifest file?") + return (None, None, None, None, None) + + self.type = l_type +@@ -174,7 +174,7 @@ class ManifestManager(object): + deleted.append(fields[0]) + + # check for new files +- for f in source_man.keys(): ++ for f in list(source_man.keys()): + if source_man[f][2] == False: + if source_man[f][0] == '-': + addedDirs.append(f) +@@ -190,28 +190,28 @@ class ManifestManager(object): + modified_size += int(source_man[f][1]) + + if len(addedFiles) > 0: +- print "#------------------------------------------------------" +- print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)) ++ print("#------------------------------------------------------") ++ print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))) + for f in addedFiles: +- print "\t".join([self.formatData(int(source_man[f][1])), f]) ++ print("\t".join([self.formatData(int(source_man[f][1])), f])) + + if len(addedDirs) > 0: +- print "#------------------------------------------------------" +- print "# Source contains %d new folders(s)" % (len(addedDirs)) ++ print("#------------------------------------------------------") ++ print("# Source contains %d new folders(s)" % (len(addedDirs))) + for f in addedDirs: +- print f ++ print(f) + + if len(modified) > 0: +- print "#------------------------------------------------------" +- print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)) ++ print("#------------------------------------------------------") ++ print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))) + for f in modified: +- print f ++ print(f) + + if len(deleted) > 0: +- print "#------------------------------------------------------" +- print "# %d files have been deleted in the source:" % len(deleted) ++ print("#------------------------------------------------------") ++ print("# %d files have been deleted in the source:" % len(deleted)) + for f in deleted: +- print f ++ print(f) + else: + return (source, + [(a, source_man[a]) for a in addedFiles], +@@ -245,13 +245,13 @@ class ManifestManager(object): + for f in modified: + total_size += int(f[1][1]) + if total_size != 0: +- print "****************************************************************" +- print "%d new file(s) to be downloaded from source" % len(added_files) +- print "%d existing file(s) to be updated" % len(modified) +- print "%s will need to be downloaded" % self.formatData(total_size) ++ print("****************************************************************") ++ print("%d new file(s) to be downloaded from source" % len(added_files)) ++ print("%d existing file(s) to be updated" % len(modified)) ++ print("%s will need to be downloaded" % self.formatData(total_size)) + do_down = self.promptUserDownload() + if not do_down: +- print "Download aborted" ++ print("Download aborted") + + update_manifest = False + if do_down: +@@ -262,13 +262,13 @@ class ManifestManager(object): + self.makeSurePathExists(full_path) + for add in added_files: + full_path = os.path.abspath(os.path.join(localManifestLocation, add[0])) +- urllib.urlretrieve(source+add[0], full_path) ++ urllib.request.urlretrieve(source+add[0], full_path) + for modify in modified: + full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0])) +- urllib.urlretrieve(source+modify[0], full_path) ++ urllib.request.urlretrieve(source+modify[0], full_path) + + if update_manifest: +- print "(re) creating manifest file (please be patient)" ++ print("(re) creating manifest file (please be patient)") + self.createManifest(localManifestLocation, manifestName=localManifestName) + + return True +@@ -303,19 +303,19 @@ class ManifestManager(object): + input_not_ok = True + minimal=False + valid_responses = {'Y':True,'N':False} +- vrs = ",".join([x.lower() for x in valid_responses.keys()]) ++ vrs = ",".join([x.lower() for x in list(valid_responses.keys())]) + while(input_not_ok): + if(minimal): +- option = raw_input("Download? ("+vrs+") : ").upper() ++ option = input("Download? ("+vrs+") : ").upper() + else: +- option = raw_input("Confirm you want to download this data\n" \ ++ option = input("Confirm you want to download this data\n" \ + "Changes *WILL* be permanent\n" \ + "Continue? ("+vrs+") : ").upper() + if(option in valid_responses): +- print "****************************************************************" ++ print("****************************************************************") + return valid_responses[option] + else: +- print "ERROR: unrecognised choice '"+option+"'" ++ print("ERROR: unrecognised choice '"+option+"'") + minimal = True + + def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__): +--- checkm/taxonParser.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/taxonParser.py +@@ -73,8 +73,8 @@ class TaxonParser(): + numMarkers, numMarkerSets = markerSet.size() + pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets]) + +- print '' +- print pTable.get_string() ++ print('') ++ print(pTable.get_string()) + + def markerSet(self, rank, taxon, markerFile): + """Obtain specified taxonomic-specific marker set.""" +--- checkm/uniqueMarkers.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/uniqueMarkers.py +@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args): + query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1])) + query.append(' %s IS NULL' % ranks[len(args)]) + query_string = 'AND'.join(query) +- print query_string ++ print(query_string) + result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string) + return result.fetchall() + +@@ -121,7 +121,7 @@ def doWork(args): + markers_from_others[Id] += count + + descriptive_markers = [] +- for marker_id, _ in marker_in_taxon_mapping.items(): ++ for marker_id, _ in list(marker_in_taxon_mapping.items()): + if marker_id in markers_from_others: + fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count) + if fraction_in_others <= args.exclude: +@@ -135,7 +135,7 @@ def doWork(args): + des_markers.append(getDescriptiveMarkers(cur, i)) + + for des_acc, des_name in des_markers: +- print des_acc, des_name ++ print(des_acc, des_name) + + if __name__ == '__main__': + +--- checkm/util/img.py.orig 2022-03-15 18:25:01 UTC ++++ checkm/util/img.py +@@ -195,7 +195,7 @@ class IMG(object): + genomeIdsOfInterest = set() + for genomeId in metadata: + bKeep = True +- for r in xrange(0, len(searchTaxa)): ++ for r in range(0, len(searchTaxa)): + if taxonStr == 'universal': + bKeep = True + elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'): +@@ -222,8 +222,8 @@ class IMG(object): + + def lineageStats(self, metadata, mostSpecificRank): + stats = {} +- for r in xrange(0, mostSpecificRank + 1): +- for _, data in metadata.iteritems(): ++ for r in range(0, mostSpecificRank + 1): ++ for _, data in metadata.items(): + taxaStr = ';'.join(data['taxonomy'][0:r + 1]) + stats[taxaStr] = stats.get(taxaStr, 0) + 1 + +@@ -231,9 +231,9 @@ class IMG(object): + + def lineagesSorted(self, metadata, mostSpecificRank=6): + lineages = [] +- for r in xrange(0, mostSpecificRank + 1): ++ for r in range(0, mostSpecificRank + 1): + taxa = set() +- for _, data in metadata.iteritems(): ++ for _, data in metadata.items(): + if 'unclassified' not in data['taxonomy'][0:r + 1]: + taxa.add(';'.join(data['taxonomy'][0:r + 1])) + +@@ -274,7 +274,7 @@ class IMG(object): + geneIdToFamilyIds[geneId].add(clusterId) + count[clusterId] = count.get(clusterId, 0) + 1 + +- for clusterId, c in count.iteritems(): ++ for clusterId, c in count.items(): + if clusterId not in table: + table[clusterId] = {} + table[clusterId][genomeId] = c +@@ -288,7 +288,7 @@ class IMG(object): + + def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9): + idsToFilter = [] +- for pfamId, genomeCounts in table.iteritems(): ++ for pfamId, genomeCounts in table.items(): + ubiquity = 0 + singleCopy = 0 + for genomeId in genomeIds: +@@ -342,7 +342,7 @@ class IMG(object): + # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs + # with GFF entries are considered. + familyIdToScaffoldIds = {} +- for pfamId, geneIds in pfamIdToGeneIds.iteritems(): ++ for pfamId, geneIds in pfamIdToGeneIds.items(): + scaffolds = [] + for geneId in geneIds: + scaffold = genePosition.get(geneId, None) +@@ -352,7 +352,7 @@ class IMG(object): + if scaffolds: + familyIdToScaffoldIds[pfamId] = scaffolds + +- for tigrId, geneIds in tigrIdToGeneIds.iteritems(): ++ for tigrId, geneIds in tigrIdToGeneIds.items(): + scaffolds = [] + for geneId in geneIds: + scaffold = genePosition.get(geneId, None) +@@ -362,9 +362,9 @@ class IMG(object): + if scaffold: + familyIdToScaffoldIds[tigrId] = scaffolds + except: +- print '[BUG]: __genomeIdToClusterScaffold' +- print sys.exc_info()[0] +- print genomeId, geneId, tigrId, pfamId ++ print('[BUG]: __genomeIdToClusterScaffold') ++ print(sys.exc_info()[0]) ++ print(genomeId, geneId, tigrId, pfamId) + sys.exit() + + return familyIdToScaffoldIds +@@ -400,7 +400,7 @@ class IMG(object): + seqs = readFasta(genomeFile) + + seqLens = {} +- for seqId, seq in seqs.iteritems(): ++ for seqId, seq in seqs.items(): + seqLens[seqId] = len(seq) + + return seqLens +@@ -462,7 +462,7 @@ class IMG(object): + # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs + # with GFF entries are considered. + familyIdToGenomePositions = {} +- for pfamId, geneIds in pfamIdToGeneIds.iteritems(): ++ for pfamId, geneIds in pfamIdToGeneIds.items(): + positions = [] + for geneId in geneIds: + position = genePosition.get(geneId, None) +@@ -472,7 +472,7 @@ class IMG(object): + if positions: + familyIdToGenomePositions[pfamId] = positions + +- for tigrId, geneIds in tigrIdToGeneIds.iteritems(): ++ for tigrId, geneIds in tigrIdToGeneIds.items(): + positions = [] + for geneId in geneIds: + position = genePosition.get(geneId, None) +@@ -482,9 +482,9 @@ class IMG(object): + if positions: + familyIdToGenomePositions[tigrId] = positions + except: +- print '[BUG]: __genomeFamilyPositions' +- print sys.exc_info()[0] +- print genomeId, geneId, tigrId, pfamId ++ print('[BUG]: __genomeFamilyPositions') ++ print(sys.exc_info()[0]) ++ print(genomeId, geneId, tigrId, pfamId) + sys.exit() + + return familyIdToGenomePositions