git: 350d96226d1b - main - biology/checkm: Update 1.0.18 -> 1.1.8
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 12 Apr 2022 03:22:32 UTC
The branch main has been updated by yuri: URL: https://cgit.FreeBSD.org/ports/commit/?id=350d96226d1bcf56747c37a3ba7e33564fc42f69 commit 350d96226d1bcf56747c37a3ba7e33564fc42f69 Author: Yuri Victorovich <yuri@FreeBSD.org> AuthorDate: 2022-04-11 19:53:21 +0000 Commit: Yuri Victorovich <yuri@FreeBSD.org> CommitDate: 2022-04-12 03:22:30 +0000 biology/checkm: Update 1.0.18 -> 1.1.8 Reported by: portscout --- biology/checkm/Makefile | 10 +- biology/checkm/distinfo | 6 +- biology/checkm/files/patch-2to3 | 648 ---------------------------------------- 3 files changed, 8 insertions(+), 656 deletions(-) diff --git a/biology/checkm/Makefile b/biology/checkm/Makefile index 6bb02b8266de..4c93a679cef4 100644 --- a/biology/checkm/Makefile +++ b/biology/checkm/Makefile @@ -1,6 +1,6 @@ PORTNAME= CheckM DISTVERSIONPREFIX= v -DISTVERSION= 1.0.18 +DISTVERSION= 1.1.8 CATEGORIES= biology python MAINTAINER= yuri@FreeBSD.org @@ -10,10 +10,10 @@ LICENSE= GPLv3 LICENSE_FILE= ${WRKSRC}/LICENSE RUN_DEPENDS= ${PYNUMPY} \ - ${PYTHON_PKGNAMEPREFIX}DendroPy>=4.0.0:science/py-DendroPy@${PY_FLAVOR} \ - ${PYTHON_PKGNAMEPREFIX}matplotlib>=1.3.1:math/py-matplotlib@${PY_FLAVOR} \ - ${PYTHON_PKGNAMEPREFIX}pysam>=0.8.3:biology/py-pysam@${PY_FLAVOR} \ - ${PYTHON_PKGNAMEPREFIX}scipy>=0.9.0:science/py-scipy@${PY_FLAVOR} + ${PYTHON_PKGNAMEPREFIX}DendroPy>=4.4.0:science/py-DendroPy@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}matplotlib>=2.1.0:math/py-matplotlib@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pysam>=0.12.0.1:biology/py-pysam@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}scipy>=0.19.1:science/py-scipy@${PY_FLAVOR} USES= dos2unix python:3.7+ USE_GITHUB= yes diff --git a/biology/checkm/distinfo b/biology/checkm/distinfo index 35db37fcbe81..b707e945959f 100644 --- a/biology/checkm/distinfo +++ b/biology/checkm/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1566202999 -SHA256 (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 240184bd7c708cd041d0fc14f81b22af5cb69cb96ae75177aee32effa578ca4e -SIZE (Ecogenomics-CheckM-v1.0.18_GH0.tar.gz) = 212064 +TIMESTAMP = 1649697676 +SHA256 (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = c6e9d007622808ae3312de73d54866292a83857837119380a036036e799c1f38 +SIZE (Ecogenomics-CheckM-v1.1.8_GH0.tar.gz) = 1016432 diff --git a/biology/checkm/files/patch-2to3 b/biology/checkm/files/patch-2to3 deleted file mode 100644 index 04b37972c86e..000000000000 --- a/biology/checkm/files/patch-2to3 +++ /dev/null @@ -1,648 +0,0 @@ ---- checkm/binTools.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/binTools.py -@@ -26,7 +26,7 @@ import gzip - - import numpy as np - --from common import binIdFromFilename, checkFileExists, readDistribution, findNearest -+from .common import binIdFromFilename, checkFileExists, readDistribution, findNearest - from checkm.util.seqUtils import readFasta, writeFasta, baseCount - from checkm.genomicSignatures import GenomicSignatures - from checkm.prodigal import ProdigalGeneFeatureParser -@@ -123,34 +123,34 @@ class BinTools(): - seqId = line[1:].split(None, 1)[0] - - if seqId in seqIds: -- print ' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId) -+ print(' [Warning] Sequence %s found multiple times in bin %s.' % (seqId, binId)) - seqIds.add(seqId) - - binSeqs[binId] = seqIds - - # check for sequences assigned to multiple bins - bDuplicates = False -- binIds = binSeqs.keys() -- for i in xrange(0, len(binIds)): -- for j in xrange(i + 1, len(binIds)): -+ binIds = list(binSeqs.keys()) -+ for i in range(0, len(binIds)): -+ for j in range(i + 1, len(binIds)): - seqInter = set(binSeqs[binIds[i]]).intersection(set(binSeqs[binIds[j]])) - - if len(seqInter) > 0: - bDuplicates = True -- print ' Sequences shared between %s and %s: ' % (binIds[i], binIds[j]) -+ print(' Sequences shared between %s and %s: ' % (binIds[i], binIds[j])) - for seqId in seqInter: -- print ' ' + seqId -- print '' -+ print(' ' + seqId) -+ print('') - - if not bDuplicates: -- print ' No sequences assigned to multiple bins.' -+ print(' No sequences assigned to multiple bins.') - - def gcDist(self, seqs): - """GC statistics for bin.""" - GCs = [] - gcTotal = 0 - basesTotal = 0 -- for _, seq in seqs.iteritems(): -+ for _, seq in seqs.items(): - a, c, g, t = baseCount(seq) - gc = g + c - bases = a + c + g + t -@@ -171,7 +171,7 @@ class BinTools(): - - codingBasesTotal = 0 - basesTotal = 0 -- for seqId, seq in seqs.iteritems(): -+ for seqId, seq in seqs.items(): - codingBases = prodigalParser.codingBases(seqId) - - CDs.append(float(codingBases) / len(seq)) -@@ -186,11 +186,11 @@ class BinTools(): - def binTetraSig(self, seqs, tetraSigs): - """Tetranucleotide signature for bin. """ - binSize = 0 -- for _, seq in seqs.iteritems(): -+ for _, seq in seqs.items(): - binSize += len(seq) - - bInit = True -- for seqId, seq in seqs.iteritems(): -+ for seqId, seq in seqs.items(): - weightedTetraSig = tetraSigs[seqId] * (float(len(seq)) / binSize) - if bInit: - binSig = weightedTetraSig -@@ -247,32 +247,32 @@ class BinTools(): - meanCD, deltaCDs, CDs = self.codingDensityDist(seqs, prodigalParser) - - # find keys into GC and CD distributions -- closestGC = findNearest(np.array(gcBounds.keys()), meanGC) -- sampleSeqLen = gcBounds[closestGC].keys()[0] -+ closestGC = findNearest(np.array(list(gcBounds.keys())), meanGC) -+ sampleSeqLen = list(gcBounds[closestGC].keys())[0] - d = gcBounds[closestGC][sampleSeqLen] -- gcLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0) -- gcUpperBoundKey = findNearest(d.keys(), (100 + distribution) / 2.0) -+ gcLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0) -+ gcUpperBoundKey = findNearest(list(d.keys()), (100 + distribution) / 2.0) - -- closestCD = findNearest(np.array(cdBounds.keys()), meanCD) -- sampleSeqLen = cdBounds[closestCD].keys()[0] -+ closestCD = findNearest(np.array(list(cdBounds.keys())), meanCD) -+ sampleSeqLen = list(cdBounds[closestCD].keys())[0] - d = cdBounds[closestCD][sampleSeqLen] -- cdLowerBoundKey = findNearest(d.keys(), (100 - distribution) / 2.0) -+ cdLowerBoundKey = findNearest(list(d.keys()), (100 - distribution) / 2.0) - -- tdBoundKey = findNearest(tdBounds[tdBounds.keys()[0]].keys(), distribution) -+ tdBoundKey = findNearest(list(tdBounds[list(tdBounds.keys())[0]].keys()), distribution) - - index = 0 -- for seqId, seq in seqs.iteritems(): -+ for seqId, seq in seqs.items(): - seqLen = len(seq) - - # find GC, CD, and TD bounds -- closestSeqLen = findNearest(gcBounds[closestGC].keys(), seqLen) -+ closestSeqLen = findNearest(list(gcBounds[closestGC].keys()), seqLen) - gcLowerBound = gcBounds[closestGC][closestSeqLen][gcLowerBoundKey] - gcUpperBound = gcBounds[closestGC][closestSeqLen][gcUpperBoundKey] - -- closestSeqLen = findNearest(cdBounds[closestCD].keys(), seqLen) -+ closestSeqLen = findNearest(list(cdBounds[closestCD].keys()), seqLen) - cdLowerBound = cdBounds[closestCD][closestSeqLen][cdLowerBoundKey] - -- closestSeqLen = findNearest(tdBounds.keys(), seqLen) -+ closestSeqLen = findNearest(list(tdBounds.keys()), seqLen) - tdBound = tdBounds[closestSeqLen][tdBoundKey] - - outlyingDists = [] ---- checkm/checkmData.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/checkmData.py -@@ -85,11 +85,11 @@ class DBConfig(object): - """Work out if we have permission to write to the CheckM config before attempting to make changes""" - try: - open(self.configFile, 'a') -- except IOError, e: -- print "You do not seem to have permission to edit the checkm config file" -- print "located at %s" % self.configFile -- print "Please try again with updated privileges. Error was:\n" -- print e -+ except IOError as e: -+ print("You do not seem to have permission to edit the checkm config file") -+ print("located at %s" % self.configFile) -+ print("Please try again with updated privileges. Error was:\n") -+ print(e) - return False - return True - -@@ -167,28 +167,28 @@ class DBManager(mm.ManifestManager): - else: - path = os.path.abspath(os.path.expanduser(path)) - -- print "" -+ print("") - if os.path.exists(path): - # path exists - if os.access(path, os.W_OK): - # path is writable - path_set = True -- print "Path [%s] exists and you have permission to write to this folder." % path -+ print("Path [%s] exists and you have permission to write to this folder." % path) - else: -- print "Path [%s] exists but you do not have permission to write to this folder." % path -+ print("Path [%s] exists but you do not have permission to write to this folder." % path) - else: - # path does not exist, try to make it - "Path [%s] does not exist so I will attempt to create it" % path - try: - self.makeSurePathExists(path) -- print "Path [%s] has been created and you have permission to write to this folder." % path -+ print("Path [%s] has been created and you have permission to write to this folder." % path) - path_set = True - except Exception: -- print "Unable to make the folder, Error was: %s" % sys.exc_info()[0] -+ print("Unable to make the folder, Error was: %s" % sys.exc_info()[0]) - minimal = True - - # (re)make the manifest file -- print "(re) creating manifest file (please be patient)." -+ print("(re) creating manifest file (please be patient).") - self.createManifest(path, self.config.values["localManifestName"]) - - return path -@@ -196,8 +196,8 @@ class DBManager(mm.ManifestManager): - def checkPermissions(self): - """See if the user has permission to write to the data directory""" - if not os.access(self.config.values["dataRoot"], os.W_OK): -- print "You do not seem to have permission to edit the CheckM data folder" -- print "located at %s" % self.config.values["dataRoot"] -+ print("You do not seem to have permission to edit the CheckM data folder") -+ print("located at %s" % self.config.values["dataRoot"]) - return False - - return True ---- checkm/coverage.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/coverage.py -@@ -62,7 +62,7 @@ class Coverage(): - binId = binIdFromFilename(binFile) - - seqs = readFasta(binFile) -- for seqId, seq in seqs.iteritems(): -+ for seqId, seq in seqs.items(): - seqIdToBinId[seqId] = binId - seqIdToSeqLen[seqId] = len(seq) - -@@ -97,12 +97,12 @@ class Coverage(): - print(header) - - # get length of all seqs -- for bamFile, seqIds in coverageInfo.iteritems(): -- for seqId in seqIds.keys(): -+ for bamFile, seqIds in coverageInfo.items(): -+ for seqId in list(seqIds.keys()): - seqIdToSeqLen[seqId] = seqIds[seqId].seqLen - - # write coverage stats for all scaffolds to file -- for seqId, seqLen in seqIdToSeqLen.iteritems(): -+ for seqId, seqLen in seqIdToSeqLen.items(): - rowStr = seqId + '\t' + seqIdToBinId.get(seqId, DefaultValues.UNBINNED) + '\t' + str(seqLen) - for bamFile in bamFiles: - bamId = binIdFromFilename(bamFile) -@@ -171,7 +171,7 @@ class Coverage(): - writeProc.join() - except: - # make sure all processes are terminated -- print traceback.format_exc() -+ print(traceback.format_exc()) - for p in workerProc: - p.terminate() - -@@ -271,16 +271,16 @@ class Coverage(): - if self.logger.getEffectiveLevel() <= logging.INFO: - sys.stderr.write('\n') - -- print '' -- print ' # total reads: %d' % totalReads -- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads) -- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads) -- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads) -- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads) -- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads) -- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads) -- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads) -- print '' -+ print('') -+ print(' # total reads: %d' % totalReads) -+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)) -+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)) -+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)) -+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)) -+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)) -+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)) -+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)) -+ print('') - - def parseCoverage(self, coverageFile): - """Read coverage information from file.""" -@@ -301,7 +301,7 @@ class Coverage(): - if seqId not in coverageStats[binId]: - coverageStats[binId][seqId] = {} - -- for i in xrange(3, len(lineSplit), 3): -+ for i in range(3, len(lineSplit), 3): - bamId = lineSplit[i] - coverage = float(lineSplit[i + 1]) - coverageStats[binId][seqId][bamId] = coverage -@@ -325,7 +325,7 @@ class Coverage(): - - # calculate mean coverage (weighted by scaffold length) - # for each bin under each BAM file -- for i in xrange(3, len(lineSplit), 3): -+ for i in range(3, len(lineSplit), 3): - bamId = lineSplit[i] - coverage = float(lineSplit[i + 1]) - binCoverages[binId][bamId].append(coverage) -@@ -341,13 +341,13 @@ class Coverage(): - - profiles = defaultdict(dict) - for binId in binStats: -- for bamId, stats in binStats[binId].iteritems(): -+ for bamId, stats in binStats[binId].items(): - binLength, meanBinCoverage = stats - coverages = binCoverages[binId][bamId] - - varCoverage = 0 - if len(coverages) > 1: -- varCoverage = mean(map(lambda x: (x - meanBinCoverage) ** 2, coverages)) -+ varCoverage = mean([(x - meanBinCoverage) ** 2 for x in coverages]) - - profiles[binId][bamId] = [meanBinCoverage, sqrt(varCoverage)] - ---- checkm/coverageWindows.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/coverageWindows.py -@@ -188,10 +188,10 @@ class CoverageWindows(): - try: - end += windowSize - except: -- print '*****************' -- print end -- print windowSize -- print '******************' -+ print('*****************') -+ print(end) -+ print(windowSize) -+ print('******************') - - coverage = float(sum(readLoader.coverage)) / seqLen - -@@ -239,13 +239,13 @@ class CoverageWindows(): - if self.logger.getEffectiveLevel() <= logging.INFO: - sys.stderr.write('\n') - -- print '' -- print ' # total reads: %d' % totalReads -- print ' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads) -- print ' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads) -- print ' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads) -- print ' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads) -- print ' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads) -- print ' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads) -- print ' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads) -- print '' -+ print('') -+ print(' # total reads: %d' % totalReads) -+ print(' # properly mapped reads: %d (%.1f%%)' % (totalMappedReads, float(totalMappedReads) * 100 / totalReads)) -+ print(' # duplicate reads: %d (%.1f%%)' % (totalDuplicates, float(totalDuplicates) * 100 / totalReads)) -+ print(' # secondary reads: %d (%.1f%%)' % (totalSecondary, float(totalSecondary) * 100 / totalReads)) -+ print(' # reads failing QC: %d (%.1f%%)' % (totalFailedQC, float(totalFailedQC) * 100 / totalReads)) -+ print(' # reads failing alignment length: %d (%.1f%%)' % (totalFailedAlignLen, float(totalFailedAlignLen) * 100 / totalReads)) -+ print(' # reads failing edit distance: %d (%.1f%%)' % (totalFailedEditDist, float(totalFailedEditDist) * 100 / totalReads)) -+ print(' # reads not properly paired: %d (%.1f%%)' % (totalFailedProperPair, float(totalFailedProperPair) * 100 / totalReads)) -+ print('') ---- checkm/manifestManager.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/manifestManager.py -@@ -47,8 +47,8 @@ __MANIFEST__ = ".dmanifest" - # system includes - import os - import hashlib --import urllib2 --import urllib -+import urllib.request, urllib.error, urllib.parse -+import urllib.request, urllib.parse, urllib.error - import shutil - import errno - -@@ -121,15 +121,15 @@ class ManifestManager(object): - source = "" - # first we assume it is remote - try: -- s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout) -+ s_man = urllib.request.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout) - source = sourceManifestLocation + "/" - except ValueError: - # then it is probably a file - s_man = open(os.path.join(sourceManifestLocation, sourceManifestName)) - source = os.path.join(sourceManifestLocation) + os.path.sep -- except urllib2.URLError: -+ except urllib.error.URLError: - # problems connecting to server, perhaps user is behind a proxy or firewall -- print "Error: failed to connect to server." -+ print("Error: failed to connect to server.") - return (None, None, None, None, None) - - first_line = True -@@ -140,11 +140,11 @@ class ManifestManager(object): - # get the type of the manifest - s_type = self.getManType(line) - if s_type != l_type: -- print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type) -+ print("Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)) - return (None, None, None, None, None) - else: - # no type specified -- print "Error: type of source manifest is not specified. Is this a valid manifest file?" -+ print("Error: type of source manifest is not specified. Is this a valid manifest file?") - return (None, None, None, None, None) - - self.type = l_type -@@ -174,7 +174,7 @@ class ManifestManager(object): - deleted.append(fields[0]) - - # check for new files -- for f in source_man.keys(): -+ for f in list(source_man.keys()): - if source_man[f][2] == False: - if source_man[f][0] == '-': - addedDirs.append(f) -@@ -190,28 +190,28 @@ class ManifestManager(object): - modified_size += int(source_man[f][1]) - - if len(addedFiles) > 0: -- print "#------------------------------------------------------" -- print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)) -+ print("#------------------------------------------------------") -+ print("# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))) - for f in addedFiles: -- print "\t".join([self.formatData(int(source_man[f][1])), f]) -+ print("\t".join([self.formatData(int(source_man[f][1])), f])) - - if len(addedDirs) > 0: -- print "#------------------------------------------------------" -- print "# Source contains %d new folders(s)" % (len(addedDirs)) -+ print("#------------------------------------------------------") -+ print("# Source contains %d new folders(s)" % (len(addedDirs))) - for f in addedDirs: -- print f -+ print(f) - - if len(modified) > 0: -- print "#------------------------------------------------------" -- print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)) -+ print("#------------------------------------------------------") -+ print("# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))) - for f in modified: -- print f -+ print(f) - - if len(deleted) > 0: -- print "#------------------------------------------------------" -- print "# %d files have been deleted in the source:" % len(deleted) -+ print("#------------------------------------------------------") -+ print("# %d files have been deleted in the source:" % len(deleted)) - for f in deleted: -- print f -+ print(f) - else: - return (source, - [(a, source_man[a]) for a in addedFiles], -@@ -245,13 +245,13 @@ class ManifestManager(object): - for f in modified: - total_size += int(f[1][1]) - if total_size != 0: -- print "****************************************************************" -- print "%d new file(s) to be downloaded from source" % len(added_files) -- print "%d existing file(s) to be updated" % len(modified) -- print "%s will need to be downloaded" % self.formatData(total_size) -+ print("****************************************************************") -+ print("%d new file(s) to be downloaded from source" % len(added_files)) -+ print("%d existing file(s) to be updated" % len(modified)) -+ print("%s will need to be downloaded" % self.formatData(total_size)) - do_down = self.promptUserDownload() - if not do_down: -- print "Download aborted" -+ print("Download aborted") - - update_manifest = False - if do_down: -@@ -262,13 +262,13 @@ class ManifestManager(object): - self.makeSurePathExists(full_path) - for add in added_files: - full_path = os.path.abspath(os.path.join(localManifestLocation, add[0])) -- urllib.urlretrieve(source+add[0], full_path) -+ urllib.request.urlretrieve(source+add[0], full_path) - for modify in modified: - full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0])) -- urllib.urlretrieve(source+modify[0], full_path) -+ urllib.request.urlretrieve(source+modify[0], full_path) - - if update_manifest: -- print "(re) creating manifest file (please be patient)" -+ print("(re) creating manifest file (please be patient)") - self.createManifest(localManifestLocation, manifestName=localManifestName) - - return True -@@ -303,19 +303,19 @@ class ManifestManager(object): - input_not_ok = True - minimal=False - valid_responses = {'Y':True,'N':False} -- vrs = ",".join([x.lower() for x in valid_responses.keys()]) -+ vrs = ",".join([x.lower() for x in list(valid_responses.keys())]) - while(input_not_ok): - if(minimal): -- option = raw_input("Download? ("+vrs+") : ").upper() -+ option = input("Download? ("+vrs+") : ").upper() - else: -- option = raw_input("Confirm you want to download this data\n" \ -+ option = input("Confirm you want to download this data\n" \ - "Changes *WILL* be permanent\n" \ - "Continue? ("+vrs+") : ").upper() - if(option in valid_responses): -- print "****************************************************************" -+ print("****************************************************************") - return valid_responses[option] - else: -- print "ERROR: unrecognised choice '"+option+"'" -+ print("ERROR: unrecognised choice '"+option+"'") - minimal = True - - def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__): ---- checkm/taxonParser.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/taxonParser.py -@@ -73,8 +73,8 @@ class TaxonParser(): - numMarkers, numMarkerSets = markerSet.size() - pTable.add_row([rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets]) - -- print '' -- print pTable.get_string() -+ print('') -+ print(pTable.get_string()) - - def markerSet(self, rank, taxon, markerFile): - """Obtain specified taxonomic-specific marker set.""" ---- checkm/uniqueMarkers.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/uniqueMarkers.py -@@ -51,7 +51,7 @@ def getOppositeRankSpecificTaxonId(cursor, *args): - query.append(' %s != \'%s\' ' % (ranks[len(args) - 1], args[-1])) - query.append(' %s IS NULL' % ranks[len(args)]) - query_string = 'AND'.join(query) -- print query_string -+ print(query_string) - result = cursor.execute('SELECT Id, "Count" FROM taxons WHERE %s' % query_string) - return result.fetchall() - -@@ -121,7 +121,7 @@ def doWork(args): - markers_from_others[Id] += count - - descriptive_markers = [] -- for marker_id, _ in marker_in_taxon_mapping.items(): -+ for marker_id, _ in list(marker_in_taxon_mapping.items()): - if marker_id in markers_from_others: - fraction_in_others = float(markers_from_others[marker_id]) / float(others_total_count) - if fraction_in_others <= args.exclude: -@@ -135,7 +135,7 @@ def doWork(args): - des_markers.append(getDescriptiveMarkers(cur, i)) - - for des_acc, des_name in des_markers: -- print des_acc, des_name -+ print(des_acc, des_name) - - if __name__ == '__main__': - ---- checkm/util/img.py.orig 2022-03-15 18:25:01 UTC -+++ checkm/util/img.py -@@ -195,7 +195,7 @@ class IMG(object): - genomeIdsOfInterest = set() - for genomeId in metadata: - bKeep = True -- for r in xrange(0, len(searchTaxa)): -+ for r in range(0, len(searchTaxa)): - if taxonStr == 'universal': - bKeep = True - elif taxonStr == 'prokaryotes' and (metadata[genomeId]['taxonomy'][0] == 'Bacteria' or metadata[genomeId]['taxonomy'][0] == 'Archaea'): -@@ -222,8 +222,8 @@ class IMG(object): - - def lineageStats(self, metadata, mostSpecificRank): - stats = {} -- for r in xrange(0, mostSpecificRank + 1): -- for _, data in metadata.iteritems(): -+ for r in range(0, mostSpecificRank + 1): -+ for _, data in metadata.items(): - taxaStr = ';'.join(data['taxonomy'][0:r + 1]) - stats[taxaStr] = stats.get(taxaStr, 0) + 1 - -@@ -231,9 +231,9 @@ class IMG(object): - - def lineagesSorted(self, metadata, mostSpecificRank=6): - lineages = [] -- for r in xrange(0, mostSpecificRank + 1): -+ for r in range(0, mostSpecificRank + 1): - taxa = set() -- for _, data in metadata.iteritems(): -+ for _, data in metadata.items(): - if 'unclassified' not in data['taxonomy'][0:r + 1]: - taxa.add(';'.join(data['taxonomy'][0:r + 1])) - -@@ -274,7 +274,7 @@ class IMG(object): - geneIdToFamilyIds[geneId].add(clusterId) - count[clusterId] = count.get(clusterId, 0) + 1 - -- for clusterId, c in count.iteritems(): -+ for clusterId, c in count.items(): - if clusterId not in table: - table[clusterId] = {} - table[clusterId][genomeId] = c -@@ -288,7 +288,7 @@ class IMG(object): - - def filterGeneCountTable(self, genomeIds, table, ubiquityThreshold=0.9, singleCopyThreshold=0.9): - idsToFilter = [] -- for pfamId, genomeCounts in table.iteritems(): -+ for pfamId, genomeCounts in table.items(): - ubiquity = 0 - singleCopy = 0 - for genomeId in genomeIds: -@@ -342,7 +342,7 @@ class IMG(object): - # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs - # with GFF entries are considered. - familyIdToScaffoldIds = {} -- for pfamId, geneIds in pfamIdToGeneIds.iteritems(): -+ for pfamId, geneIds in pfamIdToGeneIds.items(): - scaffolds = [] - for geneId in geneIds: - scaffold = genePosition.get(geneId, None) -@@ -352,7 +352,7 @@ class IMG(object): - if scaffolds: - familyIdToScaffoldIds[pfamId] = scaffolds - -- for tigrId, geneIds in tigrIdToGeneIds.iteritems(): -+ for tigrId, geneIds in tigrIdToGeneIds.items(): - scaffolds = [] - for geneId in geneIds: - scaffold = genePosition.get(geneId, None) -@@ -362,9 +362,9 @@ class IMG(object): - if scaffold: - familyIdToScaffoldIds[tigrId] = scaffolds - except: -- print '[BUG]: __genomeIdToClusterScaffold' -- print sys.exc_info()[0] -- print genomeId, geneId, tigrId, pfamId -+ print('[BUG]: __genomeIdToClusterScaffold') -+ print(sys.exc_info()[0]) -+ print(genomeId, geneId, tigrId, pfamId) - sys.exit() - - return familyIdToScaffoldIds -@@ -400,7 +400,7 @@ class IMG(object): - seqs = readFasta(genomeFile) - - seqLens = {} -- for seqId, seq in seqs.iteritems(): -+ for seqId, seq in seqs.items(): - seqLens[seqId] = len(seq) - - return seqLens -@@ -462,7 +462,7 @@ class IMG(object): - # are a few cases where this isn't tree (?) so only PFAMs/TIGRFAMs - # with GFF entries are considered. - familyIdToGenomePositions = {} -- for pfamId, geneIds in pfamIdToGeneIds.iteritems(): -+ for pfamId, geneIds in pfamIdToGeneIds.items(): - positions = [] - for geneId in geneIds: - position = genePosition.get(geneId, None) -@@ -472,7 +472,7 @@ class IMG(object): - if positions: - familyIdToGenomePositions[pfamId] = positions - -- for tigrId, geneIds in tigrIdToGeneIds.iteritems(): -+ for tigrId, geneIds in tigrIdToGeneIds.items(): - positions = [] - for geneId in geneIds: - position = genePosition.get(geneId, None) -@@ -482,9 +482,9 @@ class IMG(object): - if positions: - familyIdToGenomePositions[tigrId] = positions - except: -- print '[BUG]: __genomeFamilyPositions' -- print sys.exc_info()[0] -- print genomeId, geneId, tigrId, pfamId -+ print('[BUG]: __genomeFamilyPositions') -+ print(sys.exc_info()[0]) -+ print(genomeId, geneId, tigrId, pfamId) - sys.exit() - - return familyIdToGenomePositions