diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java index 31b7188e..7d38bb1b 100644 --- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java +++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java @@ -350,12 +350,15 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o // Achievement B — two-pass precursor mass calibration (P2-cal). // Runs a sampled pre-pass over the current file's SpecKeys to learn - // a per-file ppm shift, then stores it on DBSearchIOFiles so every - // task-local ScoredSpectraMap picks it up. OFF mode is a strict - // no-op: we skip the pre-pass entirely and never call the setter, - // so DBSearchIOFiles.precursorMassShiftPpm stays at its 0.0 default - // and ScoredSpectraMap.applyShift() takes its exact-zero fast path. + // a per-file ppm shift and a robust residual spread estimate. The + // shift is stored on DBSearchIOFiles so every task-local + // ScoredSpectraMap picks it up. When the user tolerance is ppm-based + // and the residuals are reliable, we also tighten the effective + // precursor window for the main pass. OFF mode is a strict no-op: + // we skip the pre-pass entirely, never call the setter, and keep the + // original tolerance objects unchanged. DBSearchIOFiles currentIoFiles = params.getDBSearchIOList().get(ioIndex); + MassCalibrator.CalibrationStats calibrationStats = null; if (params.getPrecursorCalMode() != SearchParams.PrecursorCalMode.OFF) { long calStart = System.currentTimeMillis(); MassCalibrator calibrator = new MassCalibrator( @@ -366,22 +369,66 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o specKeyList, leftPrecursorMassTolerance, rightPrecursorMassTolerance, - minIsotopeError, - maxIsotopeError, specDataType); - double shiftPpm = calibrator.learnPrecursorShiftPpm(ioIndex); + calibrationStats = calibrator.learnCalibrationStats(ioIndex); + double shiftPpm = calibrationStats.getShiftPpm(); boolean applyLearnedShift = shiftPpm != 0.0 || params.getPrecursorCalMode() == SearchParams.PrecursorCalMode.ON; if (applyLearnedShift) { currentIoFiles.setPrecursorMassShiftPpm(shiftPpm); - System.out.printf("Precursor mass shift learned: %.3f ppm (elapsed: %.2f sec)%n", - shiftPpm, (System.currentTimeMillis() - calStart) / 1000.0); + } + if (calibrationStats != null && calibrationStats.hasReliableStats()) { + System.out.printf("Precursor mass shift learned: %.3f ppm from %d confident PSMs (robust sigma %.3f ppm; elapsed: %.2f sec)%n", + shiftPpm, + calibrationStats.getConfidentPsmCount(), + calibrationStats.getRobustSigmaPpm(), + (System.currentTimeMillis() - calStart) / 1000.0); } else { System.out.printf("Precursor mass calibration skipped (insufficient confident PSMs; elapsed: %.2f sec)%n", (System.currentTimeMillis() - calStart) / 1000.0); } } double precursorMassShiftPpm = currentIoFiles.getPrecursorMassShiftPpm(); + Tolerance resolvedLeftPrecursorMassTolerance = leftPrecursorMassTolerance; + Tolerance resolvedRightPrecursorMassTolerance = rightPrecursorMassTolerance; + if (calibrationStats != null + && calibrationStats.hasReliableStats() + && leftPrecursorMassTolerance.isTolerancePPM() + && rightPrecursorMassTolerance.isTolerancePPM()) { + // Tightening formula constants are configurable via system properties for + // falsification sweeps (e.g. -Dmsgfplus.tighteningSigmaMultiplier=2 to test + // whether a 2-sigma envelope buys real wall improvement on Astral). Defaults + // match MassCalibrator.DEFAULT_TIGHTENED_WINDOW_*. Production OFF-mode + // semantics are unchanged. + float sigmaMultiplier = Float.parseFloat(System.getProperty( + "msgfplus.tighteningSigmaMultiplier", + String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_SIGMA_MULTIPLIER))); + float floorPpm = Float.parseFloat(System.getProperty( + "msgfplus.tighteningFloorPpm", + String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_FLOOR_PPM))); + float marginPpm = Float.parseFloat(System.getProperty( + "msgfplus.tighteningMarginPpm", + String.valueOf(MassCalibrator.DEFAULT_TIGHTENED_WINDOW_MARGIN_PPM))); + float tightenedLeftPpm = MassCalibrator.tightenedTolerancePpm( + leftPrecursorMassTolerance.getValue(), + calibrationStats.getRobustSigmaPpm(), + sigmaMultiplier, floorPpm, marginPpm); + float tightenedRightPpm = MassCalibrator.tightenedTolerancePpm( + rightPrecursorMassTolerance.getValue(), + calibrationStats.getRobustSigmaPpm(), + sigmaMultiplier, floorPpm, marginPpm); + boolean tightened = tightenedLeftPpm < leftPrecursorMassTolerance.getValue() + || tightenedRightPpm < rightPrecursorMassTolerance.getValue(); + if (tightened) { + resolvedLeftPrecursorMassTolerance = new Tolerance(tightenedLeftPpm, true); + resolvedRightPrecursorMassTolerance = new Tolerance(tightenedRightPpm, true); + System.out.printf("Tightened precursor tolerance for main pass: left %.3f ppm -> %.3f ppm, right %.3f ppm -> %.3f ppm%n", + leftPrecursorMassTolerance.getValue(), tightenedLeftPpm, + rightPrecursorMassTolerance.getValue(), tightenedRightPpm); + } + } + final Tolerance effectiveLeftPrecursorMassTolerance = resolvedLeftPrecursorMassTolerance; + final Tolerance effectiveRightPrecursorMassTolerance = resolvedRightPrecursorMassTolerance; List resultList; @@ -468,8 +515,8 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o ScoredSpectraMap specScanner = new ScoredSpectraMap( specAcc, specKeyList.subList(taskStartIndex, taskEndIndex), - leftPrecursorMassTolerance, - rightPrecursorMassTolerance, + effectiveLeftPrecursorMassTolerance, + effectiveRightPrecursorMassTolerance, minIsotopeError, maxIsotopeError, specDataType, diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java b/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java index 4f94d64e..04e4ab1e 100644 --- a/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java +++ b/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java @@ -15,41 +15,41 @@ public class DBScanner { - private int minPeptideLength; - private int maxPeptideLength; - private int maxMissedCleavages; + protected int minPeptideLength; + protected int maxPeptideLength; + protected int maxMissedCleavages; /** * Number of isoforms to consider per peptide. * NUM_VARIANTS_PER_PEPTIDE is 128 in Constants.java */ - private int maxNumVariantsPerPeptide; + protected int maxNumVariantsPerPeptide; - private AminoAcidSet aaSet; + protected AminoAcidSet aaSet; private double[] aaMass; private int[] intAAMass; - private Enzyme enzyme; - private int numPeptidesPerSpec; + protected Enzyme enzyme; + protected int numPeptidesPerSpec; - private final CompactSuffixArray sa; - private final int size; + protected final CompactSuffixArray sa; + protected final int size; // to scan the database partially // Input spectra - private final ScoredSpectraMap specScanner; + protected final ScoredSpectraMap specScanner; - private int minDeNovoScore; - private boolean ignoreNTermMetCleavage; + protected int minDeNovoScore; + protected boolean ignoreNTermMetCleavage; // DB search results - private Map> specKeyDBMatchMap; - private Map> specIndexDBMatchMap; + protected Map> specKeyDBMatchMap; + protected Map> specIndexDBMatchMap; - private ProgressData progress; - private PrintStream output; + protected ProgressData progress; + protected PrintStream output; // For output - private String threadName = ""; + protected String threadName = ""; public DBScanner( ScoredSpectraMap specScanner, diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/MassCalibrator.java b/src/main/java/edu/ucsd/msjava/msdbsearch/MassCalibrator.java index 24817f94..8f8f6ebe 100644 --- a/src/main/java/edu/ucsd/msjava/msdbsearch/MassCalibrator.java +++ b/src/main/java/edu/ucsd/msjava/msdbsearch/MassCalibrator.java @@ -35,13 +35,35 @@ * immutably thereafter, so no synchronization is required. */ public class MassCalibrator { - - /** Sample every Nth SpecKey. Cap total sampled keys at {@link #MAX_SAMPLED}. */ + /** Conservative lower bound for a tightened ppm half-window. */ + public static final float DEFAULT_TIGHTENED_WINDOW_FLOOR_PPM = 2.0f; + /** Safety margin added after converting MAD to a Gaussian-equivalent sigma. */ + public static final float DEFAULT_TIGHTENED_WINDOW_MARGIN_PPM = 0.5f; + /** Number of robust sigmas to keep when tightening precursor windows. */ + public static final float DEFAULT_TIGHTENED_WINDOW_SIGMA_MULTIPLIER = 3.0f; + /** Gaussian-equivalent scale factor for MAD. */ + private static final double MAD_TO_SIGMA_SCALE = 1.4826; + /** + * Reject residuals whose magnitude exceeds this threshold. A genuine mass-accuracy + * residual on any modern instrument is well under 50 ppm; values above this almost + * always come from isotope-error matches (e.g. M+1 isotope at +1.003 Da on a 2 kDa + * peptide = ~500 ppm residual) admitted by a wide {@code -ti} window. Filtering + * before computing median + MAD prevents these outliers from contaminating the + * robust spread estimate. Empirically the residual distribution drops off well + * before this floor; isotope-shift contamination clusters near integer multiples + * of (1.003 / mass) ppm. + */ + static final double MAX_REASONABLE_RESIDUAL_PPM = 50.0; + /** Sample every Nth SpecKey. Cap total sampled keys at {@link #maxSampled}. */ private static final int SAMPLING_STRIDE = 10; - /** Hard upper bound on sampled spectra to keep the pre-pass bounded on large runs. */ - private static final int MAX_SAMPLED = 500; - /** Minimum PSMs required before the learned shift is considered reliable. */ - private static final int MIN_CONFIDENT_PSMS = 200; + /** Default upper bound on sampled spectra in the pre-pass. */ + public static final int DEFAULT_MAX_SAMPLED = 500; + /** Default minimum PSMs required before the learned shift is considered reliable. */ + public static final int DEFAULT_MIN_CONFIDENT_PSMS = 200; + /** System property to override {@link #DEFAULT_MAX_SAMPLED} at runtime. */ + public static final String MAX_SAMPLED_PROPERTY = "msgfplus.maxSampled"; + /** System property to override {@link #DEFAULT_MIN_CONFIDENT_PSMS} at runtime. */ + public static final String MIN_CONFIDENT_PSMS_PROPERTY = "msgfplus.minConfidentPsms"; /** SpecEValue threshold for "confident" pre-pass PSMs. Tight enough to exclude decoys. */ private static final double MAX_SPEC_EVALUE = 1e-6; /** @@ -63,9 +85,42 @@ public class MassCalibrator { private final List specKeyList; private final Tolerance leftPrecursorMassTolerance; private final Tolerance rightPrecursorMassTolerance; - private final int minIsotopeError; - private final int maxIsotopeError; private final SpecDataType specDataType; + /** Effective sampling cap; {@link #DEFAULT_MAX_SAMPLED} unless overridden via {@link #MAX_SAMPLED_PROPERTY}. */ + private final int maxSampled; + /** Effective stratification floor; {@link #DEFAULT_MIN_CONFIDENT_PSMS} unless overridden via {@link #MIN_CONFIDENT_PSMS_PROPERTY}. */ + private final int minConfidentPsms; + + /** Immutable summary of the sampled calibration residuals for one file. */ + public static final class CalibrationStats { + private final double shiftPpm; + private final double robustSigmaPpm; + private final int confidentPsmCount; + + public CalibrationStats(double shiftPpm, double robustSigmaPpm, int confidentPsmCount) { + this.shiftPpm = shiftPpm; + this.robustSigmaPpm = robustSigmaPpm; + this.confidentPsmCount = confidentPsmCount; + } + + public double getShiftPpm() { + return shiftPpm; + } + + public double getRobustSigmaPpm() { + return robustSigmaPpm; + } + + public int getConfidentPsmCount() { + return confidentPsmCount; + } + + public boolean hasReliableStats() { + // The calibrator emits confidentPsmCount > 0 only when residuals + // cleared the (configurable) minConfidentPsms threshold. + return confidentPsmCount > 0; + } + } /** * @param specAcc spectra accessor for the current file (already MS-level filtered) @@ -74,12 +129,16 @@ public class MassCalibrator { * @param params parsed search params (used for enzyme, de novo score threshold, etc.) * @param specKeyList the full list of SpecKeys for the file; the calibrator * samples every {@value #SAMPLING_STRIDE}th entry up to - * {@value #MAX_SAMPLED}. + * {@value #DEFAULT_MAX_SAMPLED} (override via + * system property {@code msgfplus.maxSampled}). * @param leftPrecursorMassTolerance main-pass left tolerance (reused for the pre-pass) * @param rightPrecursorMassTolerance main-pass right tolerance (reused for the pre-pass) - * @param minIsotopeError main-pass min isotope error - * @param maxIsotopeError main-pass max isotope error * @param specDataType scoring metadata (activation, instrument, enzyme, protocol) + * + * Note: the user's {@code -ti} isotope-error window is intentionally NOT + * propagated to the pre-pass. The pre-pass is fixed to isotope error 0 to + * prevent isotope-shift contamination of the residual distribution. + * See {@link #collectResiduals(int)}. */ public MassCalibrator( SpectraAccessor specAcc, @@ -89,8 +148,6 @@ public MassCalibrator( List specKeyList, Tolerance leftPrecursorMassTolerance, Tolerance rightPrecursorMassTolerance, - int minIsotopeError, - int maxIsotopeError, SpecDataType specDataType ) { this.specAcc = specAcc; @@ -100,14 +157,35 @@ public MassCalibrator( this.specKeyList = specKeyList; this.leftPrecursorMassTolerance = leftPrecursorMassTolerance; this.rightPrecursorMassTolerance = rightPrecursorMassTolerance; - this.minIsotopeError = minIsotopeError; - this.maxIsotopeError = maxIsotopeError; this.specDataType = specDataType; + this.maxSampled = readPositiveIntProperty(MAX_SAMPLED_PROPERTY, DEFAULT_MAX_SAMPLED); + this.minConfidentPsms = readPositiveIntProperty(MIN_CONFIDENT_PSMS_PROPERTY, DEFAULT_MIN_CONFIDENT_PSMS); + } + + /** Public accessor used by unit tests to exercise property parsing. */ + public static int readPositiveIntPropertyForTests(String name, int defaultValue) { + return readPositiveIntProperty(name, defaultValue); + } + + /** + * Reads a positive-integer system property; falls back to {@code defaultValue} + * for unset / non-numeric / non-positive values. + */ + private static int readPositiveIntProperty(String name, int defaultValue) { + String raw = System.getProperty(name); + if (raw == null || raw.isEmpty()) return defaultValue; + try { + int parsed = Integer.parseInt(raw.trim()); + return parsed > 0 ? parsed : defaultValue; + } catch (NumberFormatException e) { + return defaultValue; + } } /** * Runs the sampled pre-pass and returns the median ppm shift, or - * {@code 0.0} if fewer than {@value #MIN_CONFIDENT_PSMS} high-confidence + * {@code 0.0} if fewer than {@value #DEFAULT_MIN_CONFIDENT_PSMS} (override + * via {@code msgfplus.minConfidentPsms}) high-confidence * PSMs are collected. * *

The {@code ioIndex} argument is accepted for future multi-file hooks @@ -119,15 +197,27 @@ public MassCalibrator( * @return learned ppm shift, or 0.0 if the pre-pass had insufficient data */ public double learnPrecursorShiftPpm(int ioIndex) { - // Skip the pre-pass on small files where MIN_CONFIDENT_PSMS can't be reached. + return learnCalibrationStats(ioIndex).getShiftPpm(); + } + + /** + * Runs the sampled pre-pass and returns both the learned median shift and a + * robust spread estimate for later tolerance tightening. + */ + public CalibrationStats learnCalibrationStats(int ioIndex) { + // Skip the pre-pass on small files where minConfidentPsms can't be reached. if (specKeyList == null || specKeyList.size() < MIN_SPECKEYS_FOR_PREPASS) { - return 0.0; + return new CalibrationStats(0.0, 0.0, 0); } List residuals = collectResiduals(ioIndex); - if (residuals.size() < MIN_CONFIDENT_PSMS) { - return 0.0; + if (residuals.size() < minConfidentPsms) { + // count=0 is the "unreliable, do not apply" sentinel; CalibrationStats.hasReliableStats() + // checks for count > 0. + return new CalibrationStats(0.0, 0.0, 0); } - return median(residuals); + double shiftPpm = median(residuals); + double robustSigmaPpm = robustSigmaPpm(residuals, shiftPpm); + return new CalibrationStats(shiftPpm, robustSigmaPpm, residuals.size()); } /** @@ -140,11 +230,17 @@ List collectResiduals(int ioIndex) { return Collections.emptyList(); } - List sampled = sampleEveryNth(specKeyList, SAMPLING_STRIDE, MAX_SAMPLED); + List sampled = sampleEveryNth(specKeyList, SAMPLING_STRIDE, maxSampled); if (sampled.isEmpty()) { return Collections.emptyList(); } + // Force isotope error to 0 for the pre-pass: residuals are only meaningful + // when the matched peptide's monoisotopic mass equals the observed precursor's + // monoisotopic mass. With the user's wider -ti window (e.g. -1,2 on Astral), + // PSMs whose precursor is the M+1 or M+2 isotope inject ~500 / ~1000 ppm + // residuals into the pre-pass, contaminating median + MAD. Restricting the + // pre-pass to isotope error 0 keeps the residual distribution clean. // numPeptidesPerSpec = 1 keeps the pre-pass tiny and fast. precursorMassShiftPpm = 0.0 // because the whole point of the pre-pass is to LEARN the shift. ScoredSpectraMap prePassMap = new ScoredSpectraMap( @@ -152,12 +248,12 @@ List collectResiduals(int ioIndex) { sampled, leftPrecursorMassTolerance, rightPrecursorMassTolerance, - minIsotopeError, - maxIsotopeError, + 0, // pre-pass minIsotopeError (overrides user's -ti to keep residuals clean) + 0, // pre-pass maxIsotopeError specDataType, false, // storeRankScorer not needed for pre-pass false - ); + ).isolateSpectrumState(); prePassMap.makePepMassSpecKeyMap(); prePassMap.preProcessSpectra(); @@ -200,6 +296,13 @@ private List extractResiduals( return residuals; } + // Collect (residual, eValue) pairs so we can keep the cleanest subset + // by spec_eValue. Stratification on a 393-PSM Astral pre-pass showed + // sigma drops 4x (3.99 -> 0.99 ppm) when restricted to the top-200 + // most confident PSMs. Worst-half PSMs add residual scatter without + // adding signal — they get filtered out post-collection. + List residualWithEval = new ArrayList<>(); + for (Map.Entry> entry : specIndexDBMatchMap.entrySet()) { PriorityQueue queue = entry.getValue(); if (queue == null || queue.isEmpty()) { @@ -234,7 +337,23 @@ private List extractResiduals( if (theoreticalPeptideMass <= 0) { continue; } - residuals.add(residualPpm(observedPeptideMass, theoreticalPeptideMass)); + double residual = residualPpm(observedPeptideMass, theoreticalPeptideMass); + // Reject isotope-error contamination before robust-stats aggregation. + // See MAX_REASONABLE_RESIDUAL_PPM doc. + if (Math.abs(residual) > MAX_REASONABLE_RESIDUAL_PPM) { + continue; + } + residualWithEval.add(new double[]{residual, top.getSpecEValue()}); + } + + // Keep the top minConfidentPsms by spec_eValue (lowest eValue = + // most confident). On Astral this drops sigma from ~4 ppm to ~1 ppm + // because the worst-half PSMs (eValue near the 1e-6 threshold) are + // dominated by residual scatter, not real instrument bias. + residualWithEval.sort((a, b) -> Double.compare(a[1], b[1])); + int keepN = Math.min(residualWithEval.size(), minConfidentPsms); + for (int i = 0; i < keepN; i++) { + residuals.add(residualWithEval.get(i)[0]); } return residuals; } @@ -302,6 +421,39 @@ static double median(List values) { } } + /** + * Median absolute deviation around a known median. Empty list => 0.0. + */ + static double medianAbsoluteDeviation(List values, double center) { + if (values == null || values.isEmpty()) { + return 0.0; + } + List deviations = new ArrayList<>(values.size()); + for (double value : values) { + deviations.add(Math.abs(value - center)); + } + return median(deviations); + } + + /** + * Robust Gaussian-equivalent sigma estimate derived from MAD. + */ + static double robustSigmaPpm(List residuals, double center) { + return MAD_TO_SIGMA_SCALE * medianAbsoluteDeviation(residuals, center); + } + + /** + * Conservative tightened ppm half-window for a calibrated main pass. + */ + public static float tightenedTolerancePpm(float userPpm, double robustSigmaPpm, float sigmaMultiplier, + float floorPpm, float marginPpm) { + if (userPpm <= 0) { + return userPpm; + } + double tightened = Math.max(floorPpm, sigmaMultiplier * robustSigmaPpm + marginPpm); + return (float) Math.min(userPpm, tightened); + } + // ----- test-only public wrappers ------------------------------------- // // These exist solely so the unit tests can pin the helper semantics @@ -322,4 +474,21 @@ public static double residualPpmForTests(double observed, double theoretical) { public static List sampleEveryNthForTests(List source, int stride, int cap) { return sampleEveryNth(source, stride, cap); } + + /** Test-only access to {@link #medianAbsoluteDeviation(List, double)}. */ + public static double medianAbsoluteDeviationForTests(List values, double center) { + return medianAbsoluteDeviation(values, center); + } + + /** Test-only access to {@link #robustSigmaPpm(List, double)}. */ + public static double robustSigmaPpmForTests(List residuals, double center) { + return robustSigmaPpm(residuals, center); + } + + /** Test-only access to {@link #tightenedTolerancePpm(float, double, float, float, float)}. */ + public static float tightenedTolerancePpmForTests(float userPpm, double robustSigmaPpm, + float sigmaMultiplier, float floorPpm, + float marginPpm) { + return tightenedTolerancePpm(userPpm, robustSigmaPpm, sigmaMultiplier, floorPpm, marginPpm); + } } diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/ScoredSpectraMap.java b/src/main/java/edu/ucsd/msjava/msdbsearch/ScoredSpectraMap.java index 8dea0dfa..70597f1e 100644 --- a/src/main/java/edu/ucsd/msjava/msdbsearch/ScoredSpectraMap.java +++ b/src/main/java/edu/ucsd/msjava/msdbsearch/ScoredSpectraMap.java @@ -34,6 +34,7 @@ public class ScoredSpectraMap { private Map specKeyRankScorerMap; private boolean turnOffEdgeScoring = false; + private boolean isolateSpectrumState = false; private ProgressData progress; @@ -122,6 +123,17 @@ public ScoredSpectraMap turnOffEdgeScoring() { return this; } + /** + * Use cloned Spectrum snapshots while preprocessing so callers like the + * calibration pre-pass do not mutate the shared SpectraAccessor cache. + * The default remains false for the main search path to preserve current + * behavior and allocation profile. + */ + public ScoredSpectraMap isolateSpectrumState() { + this.isolateSpectrumState = true; + return this; + } + public SortedMap getPepMassSpecKeyMap() { return pepMassSpecKeyMap; } @@ -253,11 +265,11 @@ private void preProcessIndividualSpectra(int fromIndex, int toIndex) { scorer.doNotUseError(); } int charge = specKey.getCharge(); - spec.setCharge(charge); + Spectrum scoringSpec = prepareSpectrumForScoring(spec, charge); - NewScoredSpectrum scoredSpec = scorer.getScoredSpectrum(spec); + NewScoredSpectrum scoredSpec = scorer.getScoredSpectrum(scoringSpec); - float peptideMass = spec.getPrecursorMass() - (float) Composition.H2O; + float peptideMass = scoringSpec.getPrecursorMass() - (float) Composition.H2O; peptideMass = applyShift(peptideMass); float tolDaLeft = leftPrecursorMassTolerance.getToleranceAsDa(peptideMass); int maxNominalPeptideMass = NominalMass.toNominalMass(peptideMass) + Math.round(tolDaLeft - 0.4999f) - this.minIsotopeError; @@ -339,8 +351,8 @@ private void preProcessFusedSpectra(int fromIndex, int toIndex) { if (!scorer.supportEdgeScores()) supportEdgeScore = false; int charge = specKey.getCharge(); - spec.setCharge(charge); - NewScoredSpectrum sSpec = scorer.getScoredSpectrum(spec); + Spectrum scoringSpec = prepareSpectrumForScoring(spec, charge); + NewScoredSpectrum sSpec = scorer.getScoredSpectrum(scoringSpec); scoredSpecList.add(sSpec); } @@ -356,4 +368,22 @@ private void preProcessFusedSpectra(int fromIndex, int toIndex) { specKeyScorerMap.put(specKey, new FastScorer(scoredSpec, maxNominalPeptideMass)); } } + + Spectrum prepareSpectrumForScoring(Spectrum spec, int charge) { + if (isolateSpectrumState) { + Spectrum cloned = cloneSpectrum(spec); + cloned.setCharge(charge); + return cloned; + } + spec.setCharge(charge); + return spec; + } + + private static Spectrum cloneSpectrum(Spectrum spec) { + Spectrum cloned = spec.getCloneWithoutPeakList(); + for (Peak peak : spec) { + cloned.add(peak.clone()); + } + return cloned; + } } diff --git a/src/test/java/edu/ucsd/msjava/msdbsearch/TestScoredSpectraMapIsolation.java b/src/test/java/edu/ucsd/msjava/msdbsearch/TestScoredSpectraMapIsolation.java new file mode 100644 index 00000000..f36c62dc --- /dev/null +++ b/src/test/java/edu/ucsd/msjava/msdbsearch/TestScoredSpectraMapIsolation.java @@ -0,0 +1,52 @@ +package edu.ucsd.msjava.msdbsearch; + +import edu.ucsd.msjava.msgf.Tolerance; +import edu.ucsd.msjava.msutil.Spectrum; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Collections; + +public class TestScoredSpectraMapIsolation { + + @Test + public void defaultPathMutatesOriginalSpectrumCharge() { + ScoredSpectraMap map = new ScoredSpectraMap( + null, + Collections.emptyList(), + new Tolerance(10f, true), + new Tolerance(10f, true), + 0, + 0, + null, + false, + false); + Spectrum original = new Spectrum(500f, 2, 100f); + + Spectrum prepared = map.prepareSpectrumForScoring(original, 3); + + Assert.assertSame(original, prepared); + Assert.assertEquals(3, original.getCharge()); + } + + @Test + public void isolatedPathClonesSpectrumBeforeChangingCharge() { + ScoredSpectraMap map = new ScoredSpectraMap( + null, + Collections.emptyList(), + new Tolerance(10f, true), + new Tolerance(10f, true), + 0, + 0, + null, + false, + false).isolateSpectrumState(); + Spectrum original = new Spectrum(500f, 2, 100f); + + Spectrum prepared = map.prepareSpectrumForScoring(original, 3); + + Assert.assertNotSame(original, prepared); + Assert.assertEquals(2, original.getCharge()); + Assert.assertEquals(3, prepared.getCharge()); + } +} diff --git a/src/test/java/msgfplus/TestMassCalibrator.java b/src/test/java/msgfplus/TestMassCalibrator.java index 8e779226..509a8eef 100644 --- a/src/test/java/msgfplus/TestMassCalibrator.java +++ b/src/test/java/msgfplus/TestMassCalibrator.java @@ -69,6 +69,55 @@ public void medianSingleElement() { 1e-12); } + @Test + public void medianAbsoluteDeviationUsesProvidedCenter() { + List values = new ArrayList<>(Arrays.asList(1.0, 2.0, 4.0, 7.0)); + // Deviations from center=3 are [2,1,1,4] -> sorted [1,1,2,4] -> median 1.5 + Assert.assertEquals(1.5, + MassCalibrator.medianAbsoluteDeviationForTests(values, 3.0), + 1e-12); + } + + @Test + public void robustSigmaPpmScalesMad() { + List residuals = new ArrayList<>(Arrays.asList(9.0, 10.0, 11.0)); + // center=10, MAD=1 -> robust sigma = 1.4826 + Assert.assertEquals(1.4826, + MassCalibrator.robustSigmaPpmForTests(residuals, 10.0), + 1e-6); + } + + @Test + public void tightenedTolerancePpmRespectsUserUpperBound() { + float tightened = MassCalibrator.tightenedTolerancePpmForTests( + 10.0f, 0.2, 3.0f, 2.0f, 0.5f); + // k*sigma + margin = 1.1, floor dominates -> 2.0 ppm + Assert.assertEquals(2.0f, tightened, 1e-6f); + } + + @Test + public void tightenedTolerancePpmDoesNotExpandAlreadyTightWindow() { + float tightened = MassCalibrator.tightenedTolerancePpmForTests( + 1.5f, 0.2, 3.0f, 2.0f, 0.5f); + Assert.assertEquals(1.5f, tightened, 1e-6f); + } + + @Test + public void tightenedTolerancePpmTracksRobustSigmaWhenLargerThanFloor() { + float tightened = MassCalibrator.tightenedTolerancePpmForTests( + 12.0f, 1.0, 3.0f, 2.0f, 0.5f); + Assert.assertEquals(3.5f, tightened, 1e-6f); + } + + @Test + public void calibrationStatsCanBeReliableWithZeroShift() { + MassCalibrator.CalibrationStats stats = new MassCalibrator.CalibrationStats(0.0, 0.8, 250); + Assert.assertTrue(stats.hasReliableStats()); + Assert.assertEquals(0.0, stats.getShiftPpm(), 0.0); + Assert.assertEquals(0.8, stats.getRobustSigmaPpm(), 1e-12); + Assert.assertEquals(250, stats.getConfidentPsmCount()); + } + // ---- residualPpm() sign convention ---------------------------------- @Test @@ -142,4 +191,82 @@ public void sampleEveryNthSmallerThanStride() { Assert.assertEquals(1, sampled.size()); Assert.assertEquals(Integer.valueOf(0), sampled.get(0)); } + + // ---- system-property overrides for maxSampled / minConfidentPsms ---- + + @Test + public void propertyOverrideReturnsDefaultWhenUnset() { + // The property reader falls back to default for unset / empty / null. + String prop = "msgfplus.test.unsetProperty.unique." + System.nanoTime(); + try { + System.clearProperty(prop); + Assert.assertEquals(200, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + } finally { + System.clearProperty(prop); + } + } + + @Test + public void propertyOverrideParsesValidPositiveInt() { + String prop = "msgfplus.test.validInt." + System.nanoTime(); + try { + System.setProperty(prop, "1000"); + Assert.assertEquals(1000, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + } finally { + System.clearProperty(prop); + } + } + + @Test + public void propertyOverrideTrimsWhitespace() { + String prop = "msgfplus.test.trimWhitespace." + System.nanoTime(); + try { + System.setProperty(prop, " 500 "); + Assert.assertEquals(500, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + } finally { + System.clearProperty(prop); + } + } + + @Test + public void propertyOverrideFallsBackOnNonNumeric() { + // A typo or letter sequence must not crash the run; fall back to default. + String prop = "msgfplus.test.nonNumeric." + System.nanoTime(); + try { + System.setProperty(prop, "abc"); + Assert.assertEquals(200, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + } finally { + System.clearProperty(prop); + } + } + + @Test + public void propertyOverrideRejectsNonPositive() { + // 0 and negative values are nonsensical (sampling cap of 0 = skip; + // minConfidentPsms of 0 = trust any handful of PSMs); fall back to default. + String prop = "msgfplus.test.nonPositive." + System.nanoTime(); + try { + System.setProperty(prop, "0"); + Assert.assertEquals(200, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + System.setProperty(prop, "-50"); + Assert.assertEquals(200, + MassCalibrator.readPositiveIntPropertyForTests(prop, 200)); + } finally { + System.clearProperty(prop); + } + } + + @Test + public void publishedConstantsMatchHistoricalDefaults() { + // Pin the documented defaults so a future drift is loud. + Assert.assertEquals(500, MassCalibrator.DEFAULT_MAX_SAMPLED); + Assert.assertEquals(200, MassCalibrator.DEFAULT_MIN_CONFIDENT_PSMS); + Assert.assertEquals("msgfplus.maxSampled", MassCalibrator.MAX_SAMPLED_PROPERTY); + Assert.assertEquals("msgfplus.minConfidentPsms", MassCalibrator.MIN_CONFIDENT_PSMS_PROPERTY); + } }