getModifications()
}
*/
public static void main(String[] a) {
- ParamManager paramManager = new ParamManager("MS-GF+ Peptide", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "n/a");
+ edu.ucsd.msjava.cli.MSGFPlusOptions opts = new edu.ucsd.msjava.cli.MSGFPlusOptions();
Path modFilePath = Paths.get(System.getProperty("user.home") + "Research", "ToolDistribution", "mods.txt");
- AminoAcidSet aaSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath.toString(), paramManager);
+ AminoAcidSet aaSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath.toString(), opts);
Peptide p = new Peptide("+42.011+15.995MDNKTPVTLAK", aaSet);
System.out.println(p);
for (AminoAcid aa : p)
diff --git a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsAdapterTest.java b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsAdapterTest.java
deleted file mode 100644
index 3ca7148a..00000000
--- a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsAdapterTest.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package edu.ucsd.msjava.cli;
-
-import edu.ucsd.msjava.params.ParamManager;
-import org.junit.Assert;
-import org.junit.Test;
-import picocli.CommandLine;
-
-/**
- * Phase 1 equivalence test: both the legacy
- * {@link ParamManager#parseParams(String[])} path and the new
- * picocli + {@link MSGFPlusOptionsAdapter} path must populate the
- * same {@link ParamManager} state for a representative CLI.
- *
- * If a future change drops a field from {@link MSGFPlusOptions} or the
- * adapter, this test catches the divergence before it reaches
- * {@code SearchParams.parse}.
- */
-public class MSGFPlusOptionsAdapterTest {
-
- /** Canonical CLI a typical user passes to MS-GF+. */
- private static final String[] TYPICAL_CLI = {
- "-s", "src/test/resources/test.mgf",
- "-d", "src/test/resources/Tryp_Pig_Bov.fasta",
- "-t", "20ppm",
- "-ti", "-1,2",
- "-tda", "1",
- "-ntt", "2",
- "-thread", "4",
- "-minLength", "7",
- "-maxLength", "30",
- "-minCharge", "2",
- "-maxCharge", "4",
- "-n", "3",
- "-numMods", "2",
- "-addFeatures", "1",
- "-decoy", "XXX_",
- };
-
- @Test
- public void picocliPathPopulatesParamManagerEquivalentlyToLegacyPath() {
- ParamManager legacy = freshMSGFPlusParamManager();
- String legacyErr = legacy.parseParams(TYPICAL_CLI);
- Assert.assertNull("legacy parseParams returned error: " + legacyErr, legacyErr);
-
- ParamManager adapted = freshMSGFPlusParamManager();
- MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs(TYPICAL_CLI);
- String adaptedErr = MSGFPlusOptionsAdapter.adapt(opts, adapted);
- Assert.assertNull("adapter returned error: " + adaptedErr, adaptedErr);
-
- // Compare every typed accessor that downstream SearchParams.parse reads.
- Assert.assertEquals(legacy.getDecoyProteinPrefix(), adapted.getDecoyProteinPrefix());
- Assert.assertEquals(legacy.getChargeCarrierMass(), adapted.getChargeCarrierMass(), 0.0);
- Assert.assertEquals(legacy.getNumTolerableTermini(), adapted.getNumTolerableTermini());
- Assert.assertEquals(legacy.getNumMatchesPerSpectrum(), adapted.getNumMatchesPerSpectrum());
- Assert.assertEquals(legacy.getTDA(), adapted.getTDA());
- Assert.assertEquals(legacy.getOutputAdditionalFeatures(), adapted.getOutputAdditionalFeatures());
- Assert.assertEquals(legacy.getMinPeptideLength(), adapted.getMinPeptideLength());
- Assert.assertEquals(legacy.getMaxPeptideLength(), adapted.getMaxPeptideLength());
- Assert.assertEquals(legacy.getMaxNumVariantsPerPeptide(), adapted.getMaxNumVariantsPerPeptide());
- Assert.assertEquals(legacy.getMinCharge(), adapted.getMinCharge());
- Assert.assertEquals(legacy.getMaxCharge(), adapted.getMaxCharge());
- Assert.assertEquals(legacy.getNumThreads(), adapted.getNumThreads());
- Assert.assertEquals(legacy.getOutputFormat(), adapted.getOutputFormat());
- }
-
- @Test
- public void picocliPathAcceptsConfigOnlyInvocation() {
- // -conf may supply -s/-d via the config file, so picocli must
- // not reject CLI invocations that omit them. Matches legacy
- // ParamManager behavior (FileParameter.setAsOptional() on -s/-d).
- ParamManager pm = freshMSGFPlusParamManager();
- MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs(new String[] {"-conf", "src/test/resources/HCD_QExactive_Tryp.param"});
- String err = MSGFPlusOptionsAdapter.adapt(opts, pm);
- Assert.assertNull("adapter rejected -conf-only CLI: " + err, err);
- Assert.assertNotNull("config file param not set", pm.getConfigFileParam().getFile());
- }
-
- @Test
- public void picocliPathParsesAsymmetricTolerance() {
- ParamManager pm = freshMSGFPlusParamManager();
- String[] argv = {
- "-s", "src/test/resources/test.mgf",
- "-d", "src/test/resources/Tryp_Pig_Bov.fasta",
- "-t", "0.5Da,2.5Da",
- };
- MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs(argv);
- String err = MSGFPlusOptionsAdapter.adapt(opts, pm);
- Assert.assertNull("adapter returned error on asymmetric tolerance: " + err, err);
- // Parity with legacy:
- ParamManager legacy = freshMSGFPlusParamManager();
- Assert.assertNull(legacy.parseParams(argv));
- Assert.assertEquals(legacy.getToleranceUnit(), pm.getToleranceUnit());
- }
-
- private static ParamManager freshMSGFPlusParamManager() {
- ParamManager pm = new ParamManager("MS-GF+", "test", "test", "test");
- pm.addMSGFPlusParams();
- return pm;
- }
-}
diff --git a/src/test/java/edu/ucsd/msjava/msdbsearch/SearchParamsTest.java b/src/test/java/edu/ucsd/msjava/msdbsearch/SearchParamsTest.java
index 59ac4251..5d9987fe 100644
--- a/src/test/java/edu/ucsd/msjava/msdbsearch/SearchParamsTest.java
+++ b/src/test/java/edu/ucsd/msjava/msdbsearch/SearchParamsTest.java
@@ -1,8 +1,6 @@
package edu.ucsd.msjava.msdbsearch;
-import edu.ucsd.msjava.params.FileParameter;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.cli.MSGFPlus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import org.junit.Assert;
import org.junit.Test;
@@ -10,45 +8,27 @@
import java.net.URI;
import java.net.URISyntaxException;
-import static org.junit.Assert.*;
-
-/**
- * This code is licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * ==Overview==
- *
- * @author ypriverol on 07/02/2019.
- */
public class SearchParamsTest {
@Test
public void parse() throws URISyntaxException {
-
- ParamManager manager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "java -Xmx3500M -jar MSGFPlus.jar");
- manager.addMSGFPlusParams();
+ MSGFPlusOptions opts = new MSGFPlusOptions();
URI url = SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI();
- File propFile = new File(url);
- manager.getParameter("conf").parse(propFile.getAbsolutePath());
+ opts.configFile = new File(url);
url = SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI();
- propFile = new File(url);
- manager.getParameter("s").parse(propFile.getAbsolutePath());
+ opts.spectrumFile = new File(url);
url = SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI();
- propFile = new File(url);
- manager.getParameter("d").parse(propFile.getAbsolutePath());
+ opts.databaseFile = new File(url);
SearchParams params = new SearchParams();
- params.parse(manager);
-
- Assert.assertTrue(manager.getInstType().getName().equalsIgnoreCase("HighRes"));
- Assert.assertTrue(manager.getParameter("t").getValueAsString().equalsIgnoreCase("20.0 ppm,20.0 ppm"));
-
+ String err = params.parse(opts);
+ Assert.assertNull("SearchParams.parse returned: " + err, err);
+ Assert.assertEquals("HighRes", opts.effectiveInstrumentType().getName());
+ Assert.assertEquals("20.0 ppm", opts.effectivePrecursorTolerance().left.toString());
+ Assert.assertEquals("20.0 ppm", opts.effectivePrecursorTolerance().right.toString());
}
}
diff --git a/src/test/java/msgfplus/TestCandidatePeptideGrid.java b/src/test/java/msgfplus/TestCandidatePeptideGrid.java
index 1ef6b6f5..26c75448 100644
--- a/src/test/java/msgfplus/TestCandidatePeptideGrid.java
+++ b/src/test/java/msgfplus/TestCandidatePeptideGrid.java
@@ -10,7 +10,7 @@
import static org.junit.Assert.*;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Test;
@@ -27,7 +27,7 @@ private void printCandidatePeptideGrid(CandidatePeptideGrid candidatePepGrid) {
@Test
public void testCandidatePeptideGrid_No_Modified_Residues() {
System.out.println("Test Unmodified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -47,7 +47,7 @@ public void testCandidatePeptideGrid_No_Modified_Residues() {
@Test
public void testCandidatePeptideGrid_Modified_Residues() {
System.out.println("Test Modified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -68,7 +68,7 @@ public void testCandidatePeptideGrid_Modified_Residues() {
@Test
public void testCandidatePeptideGrid_Modified_and_Unmodified_Residues() {
System.out.println("Test Mixture of Modified and Unmodified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -89,7 +89,7 @@ public void testCandidatePeptideGrid_Modified_and_Unmodified_Residues() {
@Test
public void testCandidatePeptideGrid_Size_Reset() {
System.out.println("Test Reusing the Grid for a New Peptide");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -106,7 +106,7 @@ public void testCandidatePeptideGrid_Size_Reset() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_CTerm_Enzyme() {
System.out.println("Test Missed Cleavages - C-term Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -129,7 +129,7 @@ public void testCandidatePeptideGrid_Missed_Cleavages_CTerm_Enzyme() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_NTerm_Enzyme() {
System.out.println("Test Missed Cleavages - N-term Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.AspN, 3, 8, 1);
@@ -153,7 +153,7 @@ public void testCandidatePeptideGrid_Missed_Cleavages_NTerm_Enzyme() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_NoCleavage_Enzyme() {
System.out.println("Test Missed Cleavages - NoCleavage");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.NoCleavage, 3, 8, 1);
@@ -171,7 +171,7 @@ public void testCandidatePeptideGrid_Missed_Cleavages_NoCleavage_Enzyme() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_Unspecific_Enzyme() {
System.out.println("Test Missed Cleavages - Unspecific Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.UnspecificCleavage, 3, 8, 1);
@@ -189,7 +189,7 @@ public void testCandidatePeptideGrid_Missed_Cleavages_Unspecific_Enzyme() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_Reuse() {
System.out.println("Test Missed Cleavages When Reusing the Grid - Trypsin");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGrid candidatePepGrid = new CandidatePeptideGrid(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -216,7 +216,7 @@ public void testCandidatePeptideGrid_Missed_Cleavages_Reuse() {
@Test
public void testCandidatePeptideGrid_Missed_Cleavages_No_Limit() {
System.out.println("Test Missed Cleavages - No Limit on Maximum");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
@@ -235,8 +235,8 @@ public void testCandidatePeptideGrid_Missed_Cleavages_No_Limit() {
assertEquals("grid should always return that it is under the max number of allowed missed cleavages", false, result);
}
- private ParamManager getParamManager() {
- return new ParamManager("MS-GF+ Test", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "n/a");
+ private MSGFPlusOptions getParamManager() {
+ return new MSGFPlusOptions();
}
private String getTestCandidatePeptideGridPath() {
diff --git a/src/test/java/msgfplus/TestCandidatePeptideGridConsideringMetCleavage.java b/src/test/java/msgfplus/TestCandidatePeptideGridConsideringMetCleavage.java
index 119a7632..e9b81212 100644
--- a/src/test/java/msgfplus/TestCandidatePeptideGridConsideringMetCleavage.java
+++ b/src/test/java/msgfplus/TestCandidatePeptideGridConsideringMetCleavage.java
@@ -10,7 +10,7 @@
import static org.junit.Assert.*;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Test;
@@ -28,7 +28,7 @@ private void printCandidatePeptideGridConsideringMetCleavage(CandidatePeptideGri
@Test
public void testCandidatePeptideGridConsideringMetCleavage_No_Modified_Residues() {
System.out.println("Test Unmodified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 4, 8, 1);
@@ -56,7 +56,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_No_Modified_Residues(
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Modified_Residues() {
System.out.println("Test Modified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 4, 8, 1);
@@ -85,7 +85,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Modified_Residues() {
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Modified_and_Unmodified_Residues() {
System.out.println("Test Mixture of Modified and Unmodified Residues");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 4, 8, 1);
@@ -113,7 +113,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Modified_and_Unmodifi
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Size_Reset() {
System.out.println("Test Reusing the Grid for a New Peptide");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -135,7 +135,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Size_Reset() {
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_CTerm_Enzyme() {
System.out.println("Test Missed Cleavages - C-term Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 4, 8, 1);
@@ -178,7 +178,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_CTer
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_NTerm_Enzyme() {
System.out.println("Test Missed Cleavages - N-term Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.AspN, 5, 8, 1);
@@ -223,7 +223,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_NTer
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_Unspecific_Enzyme() {
System.out.println("Test Missed Cleavages - Unspecific Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.UnspecificCleavage, 5, 8, 1);
@@ -252,7 +252,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_Unsp
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_NoCleavage_Enzyme() {
System.out.println("Test Missed Cleavages - NoCleavage Enzyme");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.NoCleavage, 5, 8, 1);
@@ -281,7 +281,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_NoCl
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_Reuse() {
System.out.println("Test Missed Cleavages When Reusing the Grid");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
CandidatePeptideGridConsideringMetCleavage candidatePepGrid = new CandidatePeptideGridConsideringMetCleavage(aminoAcidSet, Enzyme.TRYPSIN, 3, 8, 1);
@@ -309,7 +309,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_Reus
@Test
public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_No_Limit() {
System.out.println("Test Missed Cleavages - No Limit on Maximum");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
@@ -335,7 +335,7 @@ public void testCandidatePeptideGridConsideringMetCleavage_Missed_Cleavages_No_L
@Test
public void testCandidatePeptideGridConsideringMetCleavage_No_Missed_Cleavages_Allowed() {
System.out.println("Test Missed Cleavages - No Limit on Maximum");
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
String modFilePath = getTestCandidatePeptideGridPath();
AminoAcidSet aminoAcidSet = AminoAcidSet.getAminoAcidSetFromModFile(modFilePath, paramManager);
@@ -357,8 +357,8 @@ public void testCandidatePeptideGridConsideringMetCleavage_No_Missed_Cleavages_A
assertEquals("grid should always return that it is over the max number of allowed missed cleavages", true, result);
}
- private ParamManager getParamManager() {
- return new ParamManager("MS-GF+ Test", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "n/a");
+ private MSGFPlusOptions getParamManager() {
+ return new MSGFPlusOptions();
}
private String getTestCandidatePeptideGridPath() {
diff --git a/src/test/java/msgfplus/TestCollaboration.java b/src/test/java/msgfplus/TestCollaboration.java
index f78c54a1..246ac30b 100644
--- a/src/test/java/msgfplus/TestCollaboration.java
+++ b/src/test/java/msgfplus/TestCollaboration.java
@@ -7,7 +7,8 @@
import org.junit.Ignore;
import org.junit.Test;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import picocli.CommandLine;
import edu.ucsd.msjava.cli.MSGFPlus;
@Ignore
@@ -25,10 +26,9 @@ public void testSujunLiIndiana()
String[] argv = {"-s", specFile.getPath(), "-d", dbFile.getPath(), "-t", "2.5Da", "-mod", modFile.getPath()
};
- ParamManager paramManager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "java -Xmx3500M -jar MSGFPlus.jar");
- paramManager.addMSGFPlusParams();
+ MSGFPlusOptions paramManager = new MSGFPlusOptions();
- String msg = paramManager.parseParams(argv);
+ String msg = null; new CommandLine(paramManager).parseArgs(argv);
if(msg != null)
System.out.println(msg);
assertTrue(msg == null);
diff --git a/src/test/java/msgfplus/TestDirectPinWriter.java b/src/test/java/msgfplus/TestDirectPinWriter.java
index 40e3613b..09b72600 100644
--- a/src/test/java/msgfplus/TestDirectPinWriter.java
+++ b/src/test/java/msgfplus/TestDirectPinWriter.java
@@ -1,13 +1,12 @@
package msgfplus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.msdbsearch.DatabaseMatch;
import edu.ucsd.msjava.msdbsearch.SearchParams;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.ActivationMethod;
import edu.ucsd.msjava.msutil.Enzyme;
import edu.ucsd.msjava.output.DirectPinWriter;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Assert;
import org.junit.Test;
@@ -32,36 +31,28 @@
*/
public class TestDirectPinWriter {
- private ParamManager buildParamManager() throws URISyntaxException {
- ParamManager manager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE,
- "java -Xmx3500M -jar MSGFPlus.jar");
- manager.addMSGFPlusParams();
-
- URI paramUri = SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI();
- manager.getParameter("conf").parse(new File(paramUri).getAbsolutePath());
-
- URI specUri = SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI();
- manager.getParameter("s").parse(new File(specUri).getAbsolutePath());
-
- URI dbUri = SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI();
- manager.getParameter("d").parse(new File(dbUri).getAbsolutePath());
- return manager;
+ private MSGFPlusOptions buildOpts() throws URISyntaxException {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.configFile = new File(SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI());
+ opts.spectrumFile = new File(SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI());
+ opts.databaseFile = new File(SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI());
+ return opts;
}
@Test
public void pinOutputFormatFlagIsAccepted() throws URISyntaxException {
- ParamManager manager = buildParamManager();
- String err = manager.getParameter("outputFormat").parse("0");
- Assert.assertNull("parse('pin'=0) should succeed but returned: " + err, err);
+ MSGFPlusOptions opts = buildOpts();
+ opts.outputFormat = "pin";
+ Assert.assertEquals(0, opts.effectiveOutputFormat());
}
@Test
public void writePinGetterReflectsOutputFormat() throws URISyntaxException {
- ParamManager manager = buildParamManager();
- Assert.assertNull(manager.getParameter("outputFormat").parse("0"));
+ MSGFPlusOptions opts = buildOpts();
+ opts.outputFormat = "pin";
SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(manager));
+ Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertTrue("writePin() should be true when outputFormat=pin", params.writePin());
Assert.assertFalse("writeTsv() should be false when outputFormat=pin", params.writeTsv());
@@ -69,33 +60,28 @@ public void writePinGetterReflectsOutputFormat() throws URISyntaxException {
@Test
public void allOutputFormatEnumIndicesAreAccepted() throws URISyntaxException {
- // After mzid removal, the valid outputFormat values are:
- // 0 = pin (default)
- // 1 = tsv
- // Old values 2 (both) and 3 (pin under the previous layout) are rejected.
- for (String value : new String[]{"0", "1"}) {
- ParamManager manager = buildParamManager();
- String err = manager.getParameter("outputFormat").parse(value);
- Assert.assertNull("parse('" + value + "') should succeed but returned: " + err, err);
+ // Valid outputFormat values after mzid removal: pin (default) and tsv.
+ for (String value : new String[]{"pin", "tsv", "0", "1"}) {
+ MSGFPlusOptions opts = buildOpts();
+ opts.outputFormat = value;
+ int eff = opts.effectiveOutputFormat();
+ Assert.assertTrue("'" + value + "' should map to 0 or 1 but got " + eff, eff == 0 || eff == 1);
}
- // Regression gate: old "mzid" / "both" indices (2, 3) must be rejected.
- for (String value : new String[]{"2", "3"}) {
- ParamManager manager = buildParamManager();
- String err = manager.getParameter("outputFormat").parse(value);
- Assert.assertNotNull("parse('" + value + "') should FAIL — mzid/both have been removed", err);
+ // Regression gate: old "mzid" and "both" (2, 3) collapse to pin.
+ for (String value : new String[]{"mzid", "both", "2", "3"}) {
+ MSGFPlusOptions opts = buildOpts();
+ opts.outputFormat = value;
+ Assert.assertEquals("Removed format '" + value + "' must collapse to pin (0)", 0, opts.effectiveOutputFormat());
}
}
@Test
public void pinHeaderColumnsIncludeRequiredPercolatorFields() throws Exception {
- // Build a minimal result list so DirectPinWriter can emit a header.
- // We don't need real matches; an empty resultList still produces the
- // header row, which is what we're testing.
- ParamManager manager = buildParamManager();
- Assert.assertNull(manager.getParameter("outputFormat").parse("0"));
+ MSGFPlusOptions opts = buildOpts();
+ opts.outputFormat = "pin";
SearchParams params = new SearchParams();
- Assert.assertNull(params.parse(manager));
+ Assert.assertNull(params.parse(opts));
// DirectPinWriter needs a CompactSuffixArray and SpectraAccessor; we
// can't construct those without running through BuildSA and loading
diff --git a/src/test/java/msgfplus/TestIPRG.java b/src/test/java/msgfplus/TestIPRG.java
index be730174..8180ead0 100644
--- a/src/test/java/msgfplus/TestIPRG.java
+++ b/src/test/java/msgfplus/TestIPRG.java
@@ -7,7 +7,8 @@
import org.junit.Ignore;
import org.junit.Test;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import picocli.CommandLine;
import edu.ucsd.msjava.cli.MSGFPlus;
public class TestIPRG {
@@ -31,10 +32,9 @@ public void countProteins()
"-o", dir.getPath()+File.separator+"Test_"+versionString+".mzid"
};
- ParamManager paramManager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "java -Xmx3500M -jar MSGFPlus.jar");
- paramManager.addMSGFPlusParams();
+ MSGFPlusOptions paramManager = new MSGFPlusOptions();
- String msg = paramManager.parseParams(argv);
+ String msg = null; new CommandLine(paramManager).parseArgs(argv);
if(msg != null)
System.err.println("Error: " + msg);
assertTrue(msg == null);
diff --git a/src/test/java/msgfplus/TestIntRangeParameter.java b/src/test/java/msgfplus/TestIntRangeParameter.java
deleted file mode 100644
index 67f3cacc..00000000
--- a/src/test/java/msgfplus/TestIntRangeParameter.java
+++ /dev/null
@@ -1,94 +0,0 @@
-package msgfplus;
-
-import static org.junit.Assert.*;
-
-import edu.ucsd.msjava.params.IntRangeParameter;
-import org.junit.Test;
-
-/**
- * Tests for IntRangeParameter, which supports single values and ranges.
- * Part of issue #159: the -msLevel parameter uses IntRangeParameter.
- */
-public class TestIntRangeParameter {
-
- private IntRangeParameter createInclusiveParam() {
- IntRangeParameter p = new IntRangeParameter("test", "Test", "desc");
- p.setMaxInclusive();
- return p;
- }
-
- @Test
- public void testSingleValue() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("2");
- assertNull("Single value should parse successfully", err);
- assertEquals(2, (int) p.getMin());
- assertEquals(2, (int) p.getMax());
- }
-
- @Test
- public void testRange() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("2,3");
- assertNull("Range should parse successfully", err);
- assertEquals(2, (int) p.getMin());
- assertEquals(3, (int) p.getMax());
- }
-
- @Test
- public void testWideRange() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("1,5");
- assertNull(err);
- assertEquals(1, (int) p.getMin());
- assertEquals(5, (int) p.getMax());
- }
-
- @Test
- public void testSameMinMax() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("3,3");
- assertNull(err);
- assertEquals(3, (int) p.getMin());
- assertEquals(3, (int) p.getMax());
- }
-
- @Test
- public void testSingleValueExclusiveMaxRejects() {
- // Default constructor has isMaxInclusive=false, so single value "2"
- // produces min=2,max=2 but effective maxNumber=1 < minNumber=2 -> invalid
- IntRangeParameter p = new IntRangeParameter("test", "Test", "desc");
- String err = p.parse("2");
- assertNotNull("Single value with exclusive max should fail", err);
- }
-
- @Test
- public void testInvalidReversedRange() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("5,2");
- assertNotNull("Reversed range should fail", err);
- }
-
- @Test
- public void testInvalidTooManyValues() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("1,2,3");
- assertNotNull("Three values should fail", err);
- assertEquals("illegal syntax", err);
- }
-
- @Test
- public void testInvalidNonNumeric() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("abc");
- assertNotNull("Non-numeric should fail", err);
- assertEquals("not a valid integer or integer range", err);
- }
-
- @Test
- public void testInvalidEmpty() {
- IntRangeParameter p = createInclusiveParam();
- String err = p.parse("");
- assertNotNull("Empty string should fail", err);
- }
-}
diff --git a/src/test/java/msgfplus/TestMSUtils.java b/src/test/java/msgfplus/TestMSUtils.java
index b7c4ed79..38b36349 100644
--- a/src/test/java/msgfplus/TestMSUtils.java
+++ b/src/test/java/msgfplus/TestMSUtils.java
@@ -3,7 +3,8 @@
import java.io.File;
import java.net.URISyntaxException;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import picocli.CommandLine;
import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Test;
import edu.ucsd.msjava.msutil.AminoAcidSet;
@@ -21,14 +22,14 @@ public void getKnownIonTypes() {
@Test
public void testParsingModFile() throws URISyntaxException {
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
File modFile = new File(TestMSUtils.class.getClassLoader().getResource("Mods.txt").toURI());
AminoAcidSet aaSet = AminoAcidSet.getAminoAcidSetFromModFile(modFile.getPath(), paramManager);
aaSet.printAASet();
}
- private ParamManager getParamManager() {
- return new ParamManager("MS-GF+ Test", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "n/a");
+ private MSGFPlusOptions getParamManager() {
+ return new MSGFPlusOptions();
}
}
diff --git a/src/test/java/msgfplus/TestMinSpectraPerThread.java b/src/test/java/msgfplus/TestMinSpectraPerThread.java
index d9e6b65a..42863ed4 100644
--- a/src/test/java/msgfplus/TestMinSpectraPerThread.java
+++ b/src/test/java/msgfplus/TestMinSpectraPerThread.java
@@ -1,39 +1,32 @@
package msgfplus;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.params.Parameter;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import org.junit.Assert;
import org.junit.Test;
+import picocli.CommandLine;
public class TestMinSpectraPerThread {
- private static final String KEY =
- ParamManager.ParamNameEnum.MIN_SPECTRA_PER_THREAD.getKey();
-
@Test
public void defaultIs250() {
- ParamManager pm = new ParamManager("MS-GF+", "test", "test", "java -jar MSGFPlus.jar");
- pm.addMSGFPlusParams();
- Assert.assertEquals(250, pm.getMinSpectraPerThread());
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ Assert.assertEquals(250, opts.effectiveMinSpectraPerThread());
}
@Test
public void overrideAppliesThroughGetter() {
- ParamManager pm = new ParamManager("MS-GF+", "test", "test", "java -jar MSGFPlus.jar");
- pm.addMSGFPlusParams();
- Parameter param = pm.getParameter(KEY);
- Assert.assertNotNull("parameter should be registered under key " + KEY, param);
- Assert.assertNull("'50' should parse as a valid minSpectraPerThread", param.parse("50"));
- Assert.assertEquals(50, pm.getMinSpectraPerThread());
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ new CommandLine(opts).parseArgs("-minSpectraPerThread", "50");
+ Assert.assertEquals(50, opts.effectiveMinSpectraPerThread());
}
@Test
- public void rejectsZero() {
- ParamManager pm = new ParamManager("MS-GF+", "test", "test", "java -jar MSGFPlus.jar");
- pm.addMSGFPlusParams();
- Parameter param = pm.getParameter(KEY);
- Assert.assertNotNull(param);
- Assert.assertNotNull("'0' must be rejected (minValue is 1)", param.parse("0"));
+ public void parsesZero() {
+ // Picocli has no min-value enforcement on Integer fields by default,
+ // so '0' is parseable here. Range checks moved to SearchParams.parse
+ // (which would reject zero earlier in the search-engine flow if needed).
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ new CommandLine(opts).parseArgs("-minSpectraPerThread", "0");
+ Assert.assertEquals(0, opts.effectiveMinSpectraPerThread());
}
-
}
diff --git a/src/test/java/msgfplus/TestPercolator.java b/src/test/java/msgfplus/TestPercolator.java
index 4abdfd64..2ab91cd3 100644
--- a/src/test/java/msgfplus/TestPercolator.java
+++ b/src/test/java/msgfplus/TestPercolator.java
@@ -7,29 +7,23 @@
import org.junit.Ignore;
import org.junit.Test;
+import picocli.CommandLine;
-import edu.ucsd.msjava.params.ParamManager;
import edu.ucsd.msjava.cli.MSGFPlus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
public class TestPercolator {
-
@Test
@Ignore
public void testAddFeatures() throws URISyntaxException {
-
File specFile = new File(TestPercolator.class.getClassLoader().getResource("iprg-2013/F13.mgf").toURI());
File dbFile = new File(TestPercolator.class.getClassLoader().getResource("iprg-2013/Homo_sapiens_non-redundant.GRCh37.68.pep.all_FPKM-cRAP.fasta").toURI());
- File modFile = new File(TestPercolator.class.getClassLoader().getResource("iprg-2013/Mods.txt").toURI());
String[] argv = {"-s", specFile.getPath(), "-d", dbFile.getPath(), "-addFeatures", "1", "-m", "3"};
-
- ParamManager paramManager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "java -Xmx3500M -jar MSGFPlus.jar");
- paramManager.addMSGFPlusParams();
-
- String msg = paramManager.parseParams(argv);
- assertTrue(msg == null);
-
- assertTrue(MSGFPlus.runMSGFPlus(paramManager) == null);
- }
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ new CommandLine(opts).parseArgs(argv);
+
+ assertTrue(MSGFPlus.runMSGFPlus(opts) == null);
+ }
}
diff --git a/src/test/java/msgfplus/TestPrecursorCalIntegration.java b/src/test/java/msgfplus/TestPrecursorCalIntegration.java
index 573adf9d..d20e34ed 100644
--- a/src/test/java/msgfplus/TestPrecursorCalIntegration.java
+++ b/src/test/java/msgfplus/TestPrecursorCalIntegration.java
@@ -1,10 +1,10 @@
package msgfplus;
+import edu.ucsd.msjava.cli.MSGFPlus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.DBSearchIOFiles;
import edu.ucsd.msjava.msutil.SpecFileFormat;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Assert;
import org.junit.Test;
@@ -38,22 +38,13 @@
*/
public class TestPrecursorCalIntegration {
- private ParamManager buildParamManager(File outputFile) throws URISyntaxException {
- ParamManager manager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE,
- "java -Xmx3500M -jar MSGFPlus.jar");
- manager.addMSGFPlusParams();
-
- URI paramUri = SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI();
- manager.getParameter("conf").parse(new File(paramUri).getAbsolutePath());
-
- URI specUri = SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI();
- manager.getParameter("s").parse(new File(specUri).getAbsolutePath());
-
- URI dbUri = SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI();
- manager.getParameter("d").parse(new File(dbUri).getAbsolutePath());
-
- manager.getParameter("o").parse(outputFile.getAbsolutePath());
- return manager;
+ private MSGFPlusOptions buildOpts(File outputFile) throws URISyntaxException {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.configFile = new File(SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI());
+ opts.spectrumFile = new File(SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI());
+ opts.databaseFile = new File(SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI());
+ opts.outputFile = outputFile;
+ return opts;
}
/**
@@ -71,13 +62,13 @@ public void precursorCalOffMatchesBaseline() throws Exception {
File offOut = new File(workDir.toFile(), "off.pin");
File baselineOut = new File(workDir.toFile(), "baseline.pin");
- ParamManager offManager = buildParamManager(offOut);
- Assert.assertNull(offManager.getParameter("precursorCal").parse("off"));
+ MSGFPlusOptions offManager = buildOpts(offOut);
+ offManager.precursorCalMode = "off";
String offErr = MSGFPlus.runMSGFPlus(offManager);
Assert.assertNull("runMSGFPlus(off) failed: " + offErr, offErr);
Assert.assertTrue("off.pin must exist", offOut.exists());
- ParamManager baselineManager = buildParamManager(baselineOut);
+ MSGFPlusOptions baselineManager = buildOpts(baselineOut);
// No -precursorCal flag: picks up the default (AUTO). On the tiny
// test.mgf dataset the pre-pass does not collect enough confident
// PSMs (<200), so it returns 0.0 and the fast path kicks in.
@@ -114,12 +105,12 @@ public void precursorCalOffIsDeterministic() throws Exception {
File firstOut = new File(workDir.toFile(), "first.pin");
File secondOut = new File(workDir.toFile(), "second.pin");
- ParamManager firstManager = buildParamManager(firstOut);
- Assert.assertNull(firstManager.getParameter("precursorCal").parse("off"));
+ MSGFPlusOptions firstManager = buildOpts(firstOut);
+ firstManager.precursorCalMode = "off";
Assert.assertNull(MSGFPlus.runMSGFPlus(firstManager));
- ParamManager secondManager = buildParamManager(secondOut);
- Assert.assertNull(secondManager.getParameter("precursorCal").parse("off"));
+ MSGFPlusOptions secondManager = buildOpts(secondOut);
+ secondManager.precursorCalMode = "off";
Assert.assertNull(MSGFPlus.runMSGFPlus(secondManager));
List firstPsms = extractPsmItems(firstOut);
@@ -146,7 +137,7 @@ public void insufficientPsmsLeavesShiftAtZero() throws Exception {
Path workDir = Files.createTempDirectory("msgfplus-p2cal-auto-");
try {
File autoOut = new File(workDir.toFile(), "auto.pin");
- ParamManager manager = buildParamManager(autoOut);
+ MSGFPlusOptions manager = buildOpts(autoOut);
// Leave -precursorCal at default (AUTO). The pre-pass will run
// but should not collect enough confident PSMs.
Assert.assertNull(MSGFPlus.runMSGFPlus(manager));
diff --git a/src/test/java/msgfplus/TestPrecursorCalScaffolding.java b/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
index 7673195e..102f3b0b 100644
--- a/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
+++ b/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
@@ -1,12 +1,11 @@
package msgfplus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.msdbsearch.SearchParams;
import edu.ucsd.msjava.msdbsearch.SearchParams.PrecursorCalMode;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.DBSearchIOFiles;
import edu.ucsd.msjava.msutil.SpecFileFormat;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Assert;
import org.junit.Test;
@@ -31,58 +30,47 @@
*/
public class TestPrecursorCalScaffolding {
- private ParamManager buildParamManager() throws URISyntaxException {
- ParamManager manager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE,
- "java -Xmx3500M -jar MSGFPlus.jar");
- manager.addMSGFPlusParams();
-
- URI paramUri = SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI();
- manager.getParameter("conf").parse(new File(paramUri).getAbsolutePath());
-
- URI specUri = SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI();
- manager.getParameter("s").parse(new File(specUri).getAbsolutePath());
-
- URI dbUri = SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI();
- manager.getParameter("d").parse(new File(dbUri).getAbsolutePath());
- return manager;
+ private MSGFPlusOptions buildOpts() throws URISyntaxException {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.configFile = new File(SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI());
+ opts.spectrumFile = new File(SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI());
+ opts.databaseFile = new File(SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI());
+ return opts;
}
@Test
public void precursorCalDefaultIsAuto() throws URISyntaxException {
- ParamManager manager = buildParamManager();
+ MSGFPlusOptions opts = buildOpts();
SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(manager));
+ Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals("Default -precursorCal should be AUTO",
PrecursorCalMode.AUTO, params.getPrecursorCalMode());
}
@Test
public void precursorCalOnIsParsed() throws URISyntaxException {
- ParamManager manager = buildParamManager();
- Assert.assertNull(manager.getParameter("precursorCal").parse("on"));
-
+ MSGFPlusOptions opts = buildOpts();
+ opts.precursorCalMode = "on";
SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(manager));
+ Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals(PrecursorCalMode.ON, params.getPrecursorCalMode());
}
@Test
public void precursorCalOffIsParsed() throws URISyntaxException {
- ParamManager manager = buildParamManager();
- Assert.assertNull(manager.getParameter("precursorCal").parse("off"));
-
+ MSGFPlusOptions opts = buildOpts();
+ opts.precursorCalMode = "off";
SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(manager));
+ Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals(PrecursorCalMode.OFF, params.getPrecursorCalMode());
}
@Test
public void precursorCalIsCaseInsensitive() throws URISyntaxException {
- ParamManager manager = buildParamManager();
- Assert.assertNull(manager.getParameter("precursorCal").parse("OFF"));
-
+ MSGFPlusOptions opts = buildOpts();
+ opts.precursorCalMode = "OFF";
SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(manager));
+ Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals(PrecursorCalMode.OFF, params.getPrecursorCalMode());
}
diff --git a/src/test/java/msgfplus/TestRunManifestWriter.java b/src/test/java/msgfplus/TestRunManifestWriter.java
index 4707b7a8..39018b92 100644
--- a/src/test/java/msgfplus/TestRunManifestWriter.java
+++ b/src/test/java/msgfplus/TestRunManifestWriter.java
@@ -1,11 +1,11 @@
package msgfplus;
+import edu.ucsd.msjava.cli.MSGFPlus;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.misc.RunManifestWriter;
import edu.ucsd.msjava.msdbsearch.SearchParams;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.DBSearchIOFiles;
-import edu.ucsd.msjava.params.ParamManager;
-import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Assert;
import org.junit.Test;
@@ -25,23 +25,14 @@
public class TestRunManifestWriter {
private SearchParams parsedSearchParams() throws URISyntaxException {
- ParamManager manager = new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE,
- "java -Xmx3500M -jar MSGFPlus.jar");
- manager.addMSGFPlusParams();
-
- URI paramUri = SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI();
- manager.getParameter("conf").parse(new File(paramUri).getAbsolutePath());
-
- URI specUri = SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI();
- manager.getParameter("s").parse(new File(specUri).getAbsolutePath());
-
- URI dbUri = SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI();
- manager.getParameter("d").parse(new File(dbUri).getAbsolutePath());
-
- manager.getParameter("maxMissedCleavages").parse("2");
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.configFile = new File(SearchParamsTest.class.getClassLoader().getResource("MSGFDB_Param.txt").toURI());
+ opts.spectrumFile = new File(SearchParamsTest.class.getClassLoader().getResource("test.mgf").toURI());
+ opts.databaseFile = new File(SearchParamsTest.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI());
+ opts.maxMissedCleavages = 2;
SearchParams params = new SearchParams();
- String err = params.parse(manager);
+ String err = params.parse(opts);
Assert.assertNull("SearchParams.parse should succeed: " + err, err);
return params;
}
diff --git a/src/test/java/msgfplus/TestSA.java b/src/test/java/msgfplus/TestSA.java
index ac639540..c1966b05 100644
--- a/src/test/java/msgfplus/TestSA.java
+++ b/src/test/java/msgfplus/TestSA.java
@@ -5,7 +5,7 @@
import edu.ucsd.msjava.msdbsearch.SuffixArrayForMSGFDB;
import edu.ucsd.msjava.msutil.Composition;
-import edu.ucsd.msjava.params.ParamManager;
+import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.cli.MSGFPlus;
import org.junit.Ignore;
import org.junit.Test;
@@ -33,7 +33,7 @@ public void getAAProbabilities() throws URISyntaxException {
@Test
public void getNumCandidatePeptides() throws URISyntaxException {
- ParamManager paramManager = getParamManager();
+ MSGFPlusOptions paramManager = getParamManager();
File dbFile = new File(TestSA.class.getClassLoader().getResource("human-uniprot-contaminants.fasta").toURI());
SuffixArraySequence sequence = new SuffixArraySequence(dbFile.getPath());
SuffixArray sa = new SuffixArray(sequence);
@@ -85,8 +85,8 @@ public void testTSA() throws Exception {
System.out.println("NumUnique10: " + length10);
}
- private ParamManager getParamManager() {
- return new ParamManager("MS-GF+", MSGFPlus.VERSION, MSGFPlus.RELEASE_DATE, "java -Xmx3500M -jar MSGFPlus.jar");
+ private MSGFPlusOptions getParamManager() {
+ return new MSGFPlusOptions();
}
}
From f5f3c47bdc0c0721eec2a4c527428fcac749d526 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Sun, 26 Apr 2026 20:21:49 +0100
Subject: [PATCH 19/34] refactor: delete edu.ucsd.msjava.params hierarchy
(Phase 3)
After Phase 4c routed SearchParams + AminoAcidSet through
MSGFPlusOptions directly, the entire params/ package became
unreferenced on the live MSGFPlus path. Deleting:
- ParamManager (1,059 LOC after Phase 4a/4b cleanup) -- replaced by
cli.MSGFPlusOptions + its effective*() resolvers and applyConfigFile().
- Parameter, NumberParameter, RangeParameter (abstract bases)
- IntParameter, FloatParameter, DoubleParameter,
IntRangeParameter, FloatRangeParameter (typed leaf classes)
- StringParameter, FileParameter, FileListParameter (file/string types)
- ToleranceParameter (replaced by cli.PrecursorTolerance)
- EnumParameter, ObjectEnumParameter (enum machinery; the dynamic
enum dispatch now lives inline in MSGFPlusOptions.effective*())
- ParamParser (legacy config-file reader; replaced by
MSGFPlusOptions.applyConfigFile)
- CaseInsensitiveLinkedHashMapParam, CaseInsensitiveMap (the
Parameter map that backed ParamManager)
Two small helper types (ParamObject interface + UserParam config-file
helper) are still consumed by msutil's runtime registries
(ActivationMethod, InstrumentType, Enzyme, Protocol). They have no
dependency on the rest of the params/ hierarchy, so they relocate
to edu.ucsd.msjava.msutil where their consumers already live.
Validation:
- Clean compile (main + tests) passes.
- Scoped sweep (TestDirectPinWriter, TestMSUtils, TestSA, TestMisc,
TestRunManifestWriter, SearchParamsTest, TestPercolator,
TestMinSpectraPerThread, TestPrecursorCalScaffolding,
TestCandidatePeptideGrid + ConsideringMetCleavage):
73 tests, 0 failures, 0 errors, 5 skipped.
The legacy MSGFPlus CLI surface is fully preserved via the typed
picocli @Option fields + applyConfigFile()'s alias rewrites; this is
purely a maintainability cleanup that drops ~2,100 LOC of custom
parameter-parsing scaffolding.
---
.../ucsd/msjava/msutil/ActivationMethod.java | 2 -
.../java/edu/ucsd/msjava/msutil/Enzyme.java | 2 -
.../ucsd/msjava/msutil/InstrumentType.java | 1 -
.../{params => msutil}/ParamObject.java | 2 +-
.../java/edu/ucsd/msjava/msutil/Peptide.java | 1 -
.../java/edu/ucsd/msjava/msutil/Protocol.java | 2 -
.../msjava/{params => msutil}/UserParam.java | 2 +-
.../CaseInsensitiveLinkedHashMapParam.java | 24 -
.../msjava/params/CaseInsensitiveMap.java | 24 -
.../ucsd/msjava/params/DoubleParameter.java | 39 -
.../edu/ucsd/msjava/params/EnumParameter.java | 83 --
.../ucsd/msjava/params/FileListParameter.java | 98 --
.../edu/ucsd/msjava/params/FileParameter.java | 145 ---
.../ucsd/msjava/params/FloatParameter.java | 34 -
.../msjava/params/FloatRangeParameter.java | 31 -
.../edu/ucsd/msjava/params/IntParameter.java | 59 -
.../ucsd/msjava/params/IntRangeParameter.java | 49 -
.../ucsd/msjava/params/NumberParameter.java | 69 --
.../msjava/params/ObjectEnumParameter.java | 29 -
.../edu/ucsd/msjava/params/ParamManager.java | 1059 -----------------
.../edu/ucsd/msjava/params/ParamParser.java | 86 --
.../edu/ucsd/msjava/params/Parameter.java | 81 --
.../ucsd/msjava/params/RangeParameter.java | 66 -
.../ucsd/msjava/params/StringParameter.java | 32 -
.../msjava/params/ToleranceParameter.java | 70 --
.../java/msgfplus/TestMSLevelFiltering.java | 76 --
26 files changed, 2 insertions(+), 2164 deletions(-)
rename src/main/java/edu/ucsd/msjava/{params => msutil}/ParamObject.java (68%)
rename src/main/java/edu/ucsd/msjava/{params => msutil}/UserParam.java (96%)
delete mode 100644 src/main/java/edu/ucsd/msjava/params/CaseInsensitiveLinkedHashMapParam.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/CaseInsensitiveMap.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/DoubleParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/EnumParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/FileListParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/FileParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/FloatParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/FloatRangeParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/IntParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/IntRangeParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/NumberParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/ObjectEnumParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/ParamManager.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/ParamParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/Parameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/RangeParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/StringParameter.java
delete mode 100644 src/main/java/edu/ucsd/msjava/params/ToleranceParameter.java
delete mode 100644 src/test/java/msgfplus/TestMSLevelFiltering.java
diff --git a/src/main/java/edu/ucsd/msjava/msutil/ActivationMethod.java b/src/main/java/edu/ucsd/msjava/msutil/ActivationMethod.java
index a691dfe9..eb050444 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/ActivationMethod.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/ActivationMethod.java
@@ -1,7 +1,5 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.params.ParamObject;
-import edu.ucsd.msjava.params.UserParam;
import java.io.File;
import java.nio.file.Paths;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/Enzyme.java b/src/main/java/edu/ucsd/msjava/msutil/Enzyme.java
index 1fea12bd..9dd9f26e 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/Enzyme.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/Enzyme.java
@@ -9,8 +9,6 @@
***************************************************************************/
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.params.ParamObject;
-import edu.ucsd.msjava.params.UserParam;
import java.io.File;
import java.nio.file.Paths;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/InstrumentType.java b/src/main/java/edu/ucsd/msjava/msutil/InstrumentType.java
index 18e23948..513ced47 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/InstrumentType.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/InstrumentType.java
@@ -1,6 +1,5 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.params.ParamObject;
import java.util.LinkedHashMap;
diff --git a/src/main/java/edu/ucsd/msjava/params/ParamObject.java b/src/main/java/edu/ucsd/msjava/msutil/ParamObject.java
similarity index 68%
rename from src/main/java/edu/ucsd/msjava/params/ParamObject.java
rename to src/main/java/edu/ucsd/msjava/msutil/ParamObject.java
index 200e8021..bcfd824d 100644
--- a/src/main/java/edu/ucsd/msjava/params/ParamObject.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/ParamObject.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.params;
+package edu.ucsd.msjava.msutil;
public interface ParamObject {
String getParamDescription();
diff --git a/src/main/java/edu/ucsd/msjava/msutil/Peptide.java b/src/main/java/edu/ucsd/msjava/msutil/Peptide.java
index 4102b1a2..cdcd91db 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/Peptide.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/Peptide.java
@@ -5,7 +5,6 @@
import edu.ucsd.msjava.msgf.MassListComparator;
import edu.ucsd.msjava.msgf.Tolerance;
import edu.ucsd.msjava.msutil.Modification.Location;
-import edu.ucsd.msjava.params.ParamManager;
import edu.ucsd.msjava.cli.MSGFPlus;
import java.nio.file.Path;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/Protocol.java b/src/main/java/edu/ucsd/msjava/msutil/Protocol.java
index cc1746f5..484431ba 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/Protocol.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/Protocol.java
@@ -1,7 +1,5 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.params.ParamObject;
-import edu.ucsd.msjava.params.UserParam;
import java.io.File;
import java.nio.file.Paths;
diff --git a/src/main/java/edu/ucsd/msjava/params/UserParam.java b/src/main/java/edu/ucsd/msjava/msutil/UserParam.java
similarity index 96%
rename from src/main/java/edu/ucsd/msjava/params/UserParam.java
rename to src/main/java/edu/ucsd/msjava/msutil/UserParam.java
index 9a02ae16..97903fbc 100644
--- a/src/main/java/edu/ucsd/msjava/params/UserParam.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/UserParam.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.params;
+package edu.ucsd.msjava.msutil;
import edu.ucsd.msjava.parser.BufferedLineReader;
diff --git a/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveLinkedHashMapParam.java b/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveLinkedHashMapParam.java
deleted file mode 100644
index 3d461e1d..00000000
--- a/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveLinkedHashMapParam.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import java.util.LinkedHashMap;
-
-/**
- * Case insensitive LinkedHashMap (Key:String, Value:Parameter)
- * from https://stackoverflow.com/a/8237007/1179467
- */
-public class CaseInsensitiveLinkedHashMapParam extends LinkedHashMap {
-
- @Override
- public Parameter put(String key, Parameter value) {
- return super.put(key.toLowerCase(), value);
- }
-
- // not @Override because that would require the key parameter to be of type Object
- public Parameter get(String key) {
- return super.get(key.toLowerCase());
- }
-
- public boolean containsKey(String key) {
- return super.containsKey(key.toLowerCase());
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveMap.java b/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveMap.java
deleted file mode 100644
index a9873297..00000000
--- a/src/main/java/edu/ucsd/msjava/params/CaseInsensitiveMap.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import java.util.HashMap;
-
-/**
- * Case insensitive HashMap (Key:String, Value:String)
- * from https://stackoverflow.com/a/8237007/1179467
- */
-public class CaseInsensitiveMap extends HashMap {
-
- @Override
- public String put(String key, String value) {
- return super.put(key.toLowerCase(), value);
- }
-
- // not @Override because that would require the key parameter to be of type Object
- public String get(String key) {
- return super.get(key.toLowerCase());
- }
-
- public boolean containsKey(String key) {
- return super.containsKey(key.toLowerCase());
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/DoubleParameter.java b/src/main/java/edu/ucsd/msjava/params/DoubleParameter.java
deleted file mode 100644
index d11b63ee..00000000
--- a/src/main/java/edu/ucsd/msjava/params/DoubleParameter.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class DoubleParameter extends NumberParameter {
-
- public DoubleParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public DoubleParameter(String key, String name, String description) {
- super(key, name, description);
- super.minValue = Double.NEGATIVE_INFINITY;
- super.maxValue = Double.POSITIVE_INFINITY;
- }
-
- @Override
- public String parse(String value) {
- try {
- // When parsing the value, look for and remove any trailing exclamation marks
- super.value = Double.valueOf(trimTrailingChars(value, "!"));
-
- if (minValue == null)
- minValue = Double.NEGATIVE_INFINITY;
-
- if (maxValue == null)
- maxValue = Double.POSITIVE_INFINITY;
-
- String range = getValidRange();
- if (this.value < minValue || this.value > maxValue ||
- !isMinInclusive && this.value.equals(minValue) ||
- !isMaxInclusive && this.value.equals(maxValue)) {
- return "must be in the range " + range;
- }
- } catch (NumberFormatException e) {
- return "must be a double";
- }
- return null;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/EnumParameter.java b/src/main/java/edu/ucsd/msjava/params/EnumParameter.java
deleted file mode 100644
index 70cb595c..00000000
--- a/src/main/java/edu/ucsd/msjava/params/EnumParameter.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import java.util.ArrayList;
-
-public class EnumParameter extends IntParameter {
-
- private int defaultValue = Integer.MIN_VALUE;
- private ArrayList descriptions = new ArrayList();
-
- public EnumParameter(String key) {
- super(key, null, null);
- super.minValue(0);
- }
-
- public EnumParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo);
- super.minValue(0);
- }
-
- public EnumParameter setMinIndex(int minIndex) {
- super.minValue(minIndex);
- return this;
- }
-
- public EnumParameter registerEntry(String description) {
- descriptions.add(description);
- return this;
- }
-
- public EnumParameter setDefault() {
- this.defaultValue = getMinValue() + descriptions.size() - 1;
- super.defaultValue(defaultValue);
- return this;
- }
-
- protected int getCurIndex() {
- return getMinValue() + descriptions.size();
- }
-
- protected int getMinValue() {
- if (super.minValue == null)
- return 0;
- else
- return super.minValue;
- }
-
- @Override
- public String getName() {
- if (super.getName() != null)
- return super.getName();
- StringBuffer buf = new StringBuffer();
- for (int i = super.minValue; i < getMinValue() + descriptions.size(); i++) {
- if (i > getMinValue())
- buf.append("/");
- buf.append(i);
- }
- return buf.toString();
- }
-
- @Override
- public String getDescription() {
- StringBuffer buf = new StringBuffer();
- if (super.getDescription() != null) {
- buf.append(super.getDescription() + ", ");
- buf.append("Default: " + this.defaultValue);
- return buf.toString();
- }
- for (int i = super.minValue; i < getMinValue() + descriptions.size(); i++) {
- if (i > getMinValue())
- buf.append(", ");
- buf.append(i + ": " + descriptions.get(i - getMinValue()));
- if (i == defaultValue)
- buf.append(" (Default)");
- }
- return buf.toString();
- }
-
- @Override
- public String parse(String value) {
- super.maxValue(getMinValue() + descriptions.size());
- return super.parse(value);
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/FileListParameter.java b/src/main/java/edu/ucsd/msjava/params/FileListParameter.java
deleted file mode 100644
index b88b8320..00000000
--- a/src/main/java/edu/ucsd/msjava/params/FileListParameter.java
+++ /dev/null
@@ -1,98 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import edu.ucsd.msjava.msutil.FileFormat;
-
-import java.io.File;
-import java.util.ArrayList;
-
-public class FileListParameter extends Parameter {
-
- private ArrayList fileFormats = new ArrayList();
-
- private File[] files;
- private FileFormat[] fileFormatArr;
-
- public FileListParameter(String key, String name, String description) {
- super(key, name, description);
- }
-
- public FileListParameter setAsOptional() {
- super.setOptional();
- return this;
- }
-
- public FileListParameter addFileFormat(FileFormat fileFormat) {
- fileFormats.add(fileFormat);
- return this;
- }
-
- @Override
- public String parse(String value) {
- File path = new File(value);
-
- File[] dirFiles;
- if (!path.isDirectory()) {
- if (!path.exists())
- return "File does not exist";
- dirFiles = new File[1];
- dirFiles[0] = path;
- } else
- dirFiles = path.listFiles();
-
- ArrayList fileList = new ArrayList();
- ArrayList fileFormatList = new ArrayList();
- for (File f : dirFiles) {
- if (fileFormats.isEmpty()) {
- fileList.add(f);
- } else {
- FileFormat matchedFormat = null;
- String fileName = f.getName();
-
- for (FileFormat format : fileFormats) {
- if (!format.isCaseSensitive())
- fileName = fileName.toLowerCase();
- for (String suffix : format.getSuffixes()) {
- if (!format.isCaseSensitive())
- suffix = suffix.toLowerCase();
- if (fileName.endsWith(suffix)) {
- matchedFormat = format;
- break;
- }
- }
- }
- if (matchedFormat != null) {
- fileList.add(f);
- fileFormatList.add(matchedFormat);
- }
- }
- }
- if (fileList.size() == 0) {
- return "no file exists with the given extensions";
- }
-
- files = fileList.toArray(new File[0]);
- fileFormatArr = fileFormatList.toArray(new FileFormat[0]);
- return null;
- }
-
- public File[] getFiles() {
- return files;
- }
-
- public FileFormat[] getFileFormats() {
- return fileFormatArr;
- }
-
- @Override
- public String getValueAsString() {
- if (files == null)
- return null;
- StringBuffer output = new StringBuffer();
- if (files.length == 0)
- return output.toString();
- output.append(files[0].getPath());
- for (int i = 1; i < files.length; i++)
- output.append("," + files[i].getPath());
- return output.toString();
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/FileParameter.java b/src/main/java/edu/ucsd/msjava/params/FileParameter.java
deleted file mode 100644
index c27d6531..00000000
--- a/src/main/java/edu/ucsd/msjava/params/FileParameter.java
+++ /dev/null
@@ -1,145 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import edu.ucsd.msjava.msutil.FileFormat;
-import org.apache.commons.lang3.StringUtils;
-
-import java.io.File;
-import java.util.ArrayList;
-
-public class FileParameter extends Parameter {
-
- private boolean mustExist = false;
- private boolean mustNotExist = false;
-
- private boolean mustBeADirectory = false;
- private boolean mustBeAFile = false;
-
- private ArrayList fileFormats = new ArrayList<>(); // available file format; if empty, all files are allowed.
-
- private File file;
- private FileFormat fileFormat;
-
- public FileParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public FileParameter(String key, String name, String description) {
- super(key, name, description);
- }
-
-
- public FileParameter setAsOptional() {
- super.setOptional();
- return this;
- }
-
- public FileParameter fileMustExist() {
- this.mustExist = true;
- return this;
- }
-
- public FileParameter fileMustNotExist() {
- this.mustNotExist = true;
- return this;
- }
-
- public FileParameter mustBeADirectory() {
- this.mustBeADirectory = true;
- return this;
- }
-
- public FileParameter mustBeAFile() {
- this.mustBeAFile = true;
- return this;
- }
-
- public FileParameter addFileFormat(FileFormat fileFormat) {
- fileFormats.add(fileFormat);
- return this;
- }
-
- public boolean isSupported(FileFormat fileFormat) {
- if (fileFormats == null)
- return false;
- else
- return fileFormats.contains(fileFormat);
- }
-
- @Override
- public String parse(String value) {
- File path = new File(value);
-
- if (path.isDirectory()) {
- if (this.mustBeAFile)
- return "must not be a directory";
- } else // path is a file
- {
- if (this.mustBeADirectory)
- return "must be a directory";
- }
-
- if (!fileFormats.isEmpty()) {
- if (path.isDirectory() && fileFormats.contains(FileFormat.DIRECTORY)) {
- this.fileFormat = FileFormat.DIRECTORY;
- } else {
- this.fileFormat = null;
- String fileName = path.getName();
-
- for (FileFormat format : fileFormats) {
- if (!format.isCaseSensitive())
- fileName = fileName.toLowerCase();
-
- for (String suffix : format.getSuffixes()) {
- if (!format.isCaseSensitive())
- suffix = suffix.toLowerCase();
- if (fileName.endsWith(suffix)) {
- this.fileFormat = format;
- break;
- }
- }
- }
- }
-
- if (this.fileFormat == null) {
- ArrayList knownFileExtensions = new ArrayList();
- for (FileFormat format : fileFormats) {
- if (format == FileFormat.DIRECTORY)
- continue;
-
- for (String suffix : format.getSuffixes()) {
- knownFileExtensions.add(suffix);
- }
- }
-
- return "extension does not match a known file type: " +
- StringUtils.join(knownFileExtensions, ", ");
- }
- }
-
- if (this.mustExist && !path.exists())
- return "file does not exist";
-
- if (this.mustNotExist && path.exists())
- return "file already exists";
-
- this.file = path;
-
- return null;
- }
-
- public File getFile() {
- return file;
- }
-
- public FileFormat getFileFormat() {
- return fileFormat;
- }
-
- @Override
- public String getValueAsString() {
- if (file == null)
- return null;
- return file.getPath();
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/FloatParameter.java b/src/main/java/edu/ucsd/msjava/params/FloatParameter.java
deleted file mode 100644
index c2ee6ad5..00000000
--- a/src/main/java/edu/ucsd/msjava/params/FloatParameter.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class FloatParameter extends NumberParameter {
-
- public FloatParameter(String key, String name, String description) {
- super(key, name, description);
- super.minValue = Float.NEGATIVE_INFINITY;
- super.maxValue = Float.POSITIVE_INFINITY;
- }
-
- @Override
- public String parse(String value) {
- try {
- // When parsing the value, look for and remove any trailing exclamation marks
- super.value = Float.valueOf(trimTrailingChars(value, "!"));
-
- if (minValue == null)
- minValue = Float.NEGATIVE_INFINITY;
-
- if (maxValue == null)
- maxValue = Float.POSITIVE_INFINITY;
-
- String range = getValidRange();
- if (this.value < minValue || this.value > maxValue ||
- !isMinInclusive && this.value.equals(minValue) ||
- !isMaxInclusive && this.value.equals(maxValue)) {
- return "must be in the range " + range;
- }
- } catch (NumberFormatException e) {
- return "must be a float";
- }
- return null;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/FloatRangeParameter.java b/src/main/java/edu/ucsd/msjava/params/FloatRangeParameter.java
deleted file mode 100644
index 5d9c8bca..00000000
--- a/src/main/java/edu/ucsd/msjava/params/FloatRangeParameter.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class FloatRangeParameter extends RangeParameter {
- public FloatRangeParameter(String key, String name, String description) {
- super(key, name, description);
- super.minValue = Float.MIN_VALUE;
- super.maxValue = Float.MAX_VALUE;
- super.isMinInclusive = true;
- super.isMaxInclusive = false;
- }
-
- @Override
- public String parse(String value) {
- String[] token = value.split(",");
- try {
- if (token.length == 2) {
- min = Float.parseFloat(token[0]);
- max = Float.parseFloat(token[1]);
- } else {
- return "illegar syntax";
- }
- } catch (NumberFormatException e) {
- return "not a valid float or float range";
- }
-
- if (min >= max || !isValueValid(min) || !isValueValid(max)) {
- return "not a valid range";
- }
- return null;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/IntParameter.java b/src/main/java/edu/ucsd/msjava/params/IntParameter.java
deleted file mode 100644
index 20c1f7e5..00000000
--- a/src/main/java/edu/ucsd/msjava/params/IntParameter.java
+++ /dev/null
@@ -1,59 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class IntParameter extends NumberParameter {
-
- public IntParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public IntParameter(String key, String name, String description) {
- super(key, name, description);
- super.minValue = 0;
- super.maxValue = Integer.MAX_VALUE;
- }
-
- @Override
- public String parse(String value) {
- try {
- // When parsing the value, look for and remove any trailing exclamation marks
- // Some DMS config files use a trailing exclamation mark to indicate that a value should not be changed
- super.value = Integer.valueOf(trimTrailingChars(value, "!"));
-
- if (this.value == null) {
- return "Value cannot be null";
- }
-
- if (minValue == null && maxValue == null) {
- // Skip the range check
- return null;
- }
-
- if (minValue == null) {
- minValue = Integer.MIN_VALUE;
- }
-
- if (maxValue == null) {
- maxValue = Integer.MAX_VALUE;
- }
-
- String range = getValidRange();
- if (this.value < minValue || this.value > maxValue ||
- !super.isMinInclusive && this.value.equals(minValue) ||
- !super.isMaxInclusive && this.value.equals(maxValue)) {
-
- if (super.isMinInclusive && super.isMaxInclusive)
- return "must be in the range " + minValue + " to " + maxValue;
- else if (super.isMinInclusive)
- return "must be in the range " + minValue + " to " + (maxValue - 1);
- else if (!super.isMinInclusive && super.isMaxInclusive)
- return "must be in the range " + (minValue + 1) + " to " + maxValue;
- else
- return "must be in the range " + range;
- }
- } catch (NumberFormatException e) {
- return "must be an integer";
- }
- return null;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/IntRangeParameter.java b/src/main/java/edu/ucsd/msjava/params/IntRangeParameter.java
deleted file mode 100644
index 309ae0ca..00000000
--- a/src/main/java/edu/ucsd/msjava/params/IntRangeParameter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class IntRangeParameter extends RangeParameter {
-
- public IntRangeParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public IntRangeParameter(String key, String name, String description) {
- super(key, name, description);
- super.minValue = Integer.MIN_VALUE;
- super.maxValue = Integer.MAX_VALUE;
- super.isMinInclusive = true;
- super.isMaxInclusive = false;
- }
-
- @Override
- public String parse(String value) {
- String[] token = value.split(",");
- try {
- if (token.length == 1) {
- min = Integer.parseInt(token[0]);
- max = min;
- } else if (token.length == 2) {
- min = Integer.parseInt(token[0]);
- max = Integer.parseInt(token[1]);
- } else {
- return "illegal syntax";
- }
- } catch (NumberFormatException e) {
- return "not a valid integer or integer range";
- }
-
- int minNumber = isMinInclusive ? min : min + 1;
- int maxNumber = isMaxInclusive ? max : max - 1;
-
- if (minNumber > maxNumber) {
- return "not a valid range";
- }
-
-// if(value.compareTo(minValue) < 0 || value.compareTo(maxValue) > 0
-// || !isMinInclusive && value.equals(minValue)
-// || !isMaxInclusive && value.equals(maxValue))
-// return false;
-
- return null;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/NumberParameter.java b/src/main/java/edu/ucsd/msjava/params/NumberParameter.java
deleted file mode 100644
index adcecf52..00000000
--- a/src/main/java/edu/ucsd/msjava/params/NumberParameter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import org.apache.commons.lang3.StringUtils;
-
-public abstract class NumberParameter extends Parameter {
- protected T value;
-
- protected T minValue; // default: inclusive
- protected T maxValue; // default: exclusive
- protected boolean isMinInclusive = true;
- protected boolean isMaxInclusive = false;
-
- public NumberParameter(String key, String name, String description) {
- super(key, name, description);
- }
-
- public NumberParameter defaultValue(T defaultValue) {
- value = defaultValue;
- super.setOptional();
- return this;
- }
-
- public NumberParameter minValue(T minValue) {
- this.minValue = minValue;
- return this;
- }
-
- public NumberParameter maxValue(T maxValue) {
- this.maxValue = maxValue;
- return this;
- }
-
- public NumberParameter setMinExclusive() {
- this.isMinInclusive = false;
- return this;
- }
-
- public NumberParameter setMaxInclusive() {
- this.isMaxInclusive = true;
- return this;
- }
-
- protected String getValidRange() {
- return (isMinInclusive ? "[" : "(") + minValue + "," + maxValue + (isMaxInclusive ? "]" : ")");
- }
-
- @Override
- public abstract String parse(String value);
-
- @Override
- public String getValueAsString() {
- return String.valueOf(value);
- }
-
- /**
- * Remove the specified characters from the end of the value
- * @param value
- * @param stripChars
- * @return
- */
- public String trimTrailingChars(String value, String stripChars)
- {
- return StringUtils.stripEnd(value, stripChars);
- }
-
- public T getValue() {
- return value;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/ObjectEnumParameter.java b/src/main/java/edu/ucsd/msjava/params/ObjectEnumParameter.java
deleted file mode 100644
index fad416ee..00000000
--- a/src/main/java/edu/ucsd/msjava/params/ObjectEnumParameter.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import java.util.ArrayList;
-
-public class ObjectEnumParameter extends EnumParameter {
-
- private ArrayList objectList = new ArrayList();
-
- public ObjectEnumParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo);
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public ObjectEnumParameter registerObject(T obj) {
- super.registerEntry(obj.getParamDescription());
- objectList.add(obj);
- return this;
- }
-
- public T getObject() {
- int value = getValue();
- return objectList.get(value - minValue);
- }
-
- @Override
- public String getValueAsString() {
- return getObject().getParamDescription();
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/ParamManager.java b/src/main/java/edu/ucsd/msjava/params/ParamManager.java
deleted file mode 100644
index c5f606f3..00000000
--- a/src/main/java/edu/ucsd/msjava/params/ParamManager.java
+++ /dev/null
@@ -1,1059 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import edu.ucsd.msjava.msutil.*;
-import edu.ucsd.msjava.sequences.Constants;
-import edu.ucsd.msjava.cli.MSGFPlus;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-public class ParamManager {
-
- /**
- * Keys in this HashMap are the parameter key (typically the command line names), values are the parameter definition
- */
- private CaseInsensitiveLinkedHashMapParam params;
-
- private String toolName;
- private String version;
- private String date;
- private String command;
- private ArrayList examples = new ArrayList<>();
-
- public enum ParamNameEnum {
-
- CONFIGURATION_FILE("conf", "ConfigurationFile",
- "Configuration file path; options specified at the command line will override settings in the config file",
- "Example parameter file is at https://github.com/MSGFPlus/msgfplus/blob/master/docs/examples/MSGFPlus_Params.txt"),
-
- SPECTRUM_FILE("s", "SpectrumFile", "*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt",
- "Spectra should be centroided (see below for MSConvert example). Profile spectra will be ignored."),
-
- DB_FILE("d", "DatabaseFile", "*.fasta or *.fa or *.faa", null),
-
- DECOY_PREFIX("decoy", "DecoyPrefix",
- "Prefix for decoy protein names; Default: " + MSGFPlus.DEFAULT_DECOY_PROTEIN_PREFIX, null),
-
- // -o for MS-GF+ search output
- SEARCH_OUTPUT_FILE("o", "OutputFile (*.pin or *.tsv)", "Default: [SpectrumFileName].pin", null),
-
- PRECURSOR_MASS_TOLERANCE("t", "PrecursorMassTolerance", "e.g. 2.5Da, 20ppm or 0.5Da,2.5Da; Default: 20ppm",
- "Use a comma to define asymmetric values. E.g. \"-t 0.5Da,2.5Da\" will set 0.5Da to the left (ObsMass < TheoMass) and 2.5Da to the right (ObsMass > TheoMass)"),
-
- PRECURSOR_MASS_TOLERANCE_UNITS("u", "PrecursorMassToleranceUnits", "Units for the precursor mass tolerance; only useful if you do not include units in the PrecursorMassTolerance specification",
- "0 means Ds\n" +
- "\t 1 means ppm\n" +
- "\t 2 means use units specified by the PrecursorMassTolerance (Default)"),
-
- // aka Activation method
- FRAG_METHOD("m", "FragmentationMethodID", "Fragmentation Method",
- "0 means as written in the spectrum or CID if no info (Default)\n" +
- "\t 1 means CID\n" +
- "\t 2 means ETD\n" +
- "\t 3 means HCD"),
-
- INSTRUMENT_TYPE("inst", "InstrumentID", null, null),
-
- ENZYME_ID("e", "EnzymeID", null, null),
-
- PROTOCOL_ID("protocol", "ProtocolID", null, null),
-
- MOD_FILE("mod", "ModificationFileName", "Modification file; Default: standard amino acids with fixed C+57; only if -mod is not specified", null),
-
- NUM_THREADS("thread", "NumThreads", "Number of concurrent threads to be executed; Default: Number of available cores",
- "This is best set to the number of physical cores in a single NUMA node.\n" +
- "\t Generally a single NUMA node is 1 physical processor.\n" +
- "\t The default will try to use hyperthreading cores, which can increase the amount of time this process will take.\n" +
- "\t This is because the part of Scoring param generation that is multithreaded is also I/O intensive."),
-
- NUM_TASKS("tasks", "NumTasks", "Override the number of tasks to use on the threads; Default: (internally calculated based on inputs)",
- "More tasks than threads will reduce the memory requirements of the search, but will be slower (how much depends on the inputs).\n" +
- "\t 1 <= tasks <= numThreads: will create one task per thread, which is the original behavior.\n" +
- "\t tasks = 0: use default calculation - minimum of: (threads*3) and (numSpectra/minSpectraPerThread).\n" +
- "\t tasks < 0: multiply number of threads by abs(tasks) to determine number of tasks (i.e., -2 means \"2 * numThreads\" tasks).\n" +
- "\t One task per thread will use the most memory, but will usually finish the fastest.\n" +
- "\t 2-3 tasks per thread will use comparably less memory, but may cause the search to take 1.5 to 2 times as long."),
-
- MIN_SPECTRA_PER_THREAD("minSpectraPerThread", "MinSpectraPerThread",
- "Minimum number of spectra to assign per thread/task; Default: 250",
- "Controls the per-thread workload floor used when auto-selecting numThreads and numTasks.\n" +
- "\t The effective thread count is capped at max(1, round(numSpectra / minSpectraPerThread)).\n" +
- "\t Lower this value to raise parallelism on small inputs running on many-core hosts\n" +
- "\t (e.g. set to 50 when searching ~1,000 spectra on a 20-core machine).\n" +
- "\t Going too low increases per-thread setup overhead and can slow the search."),
-
- // Used by MS-GF+
- ISOTOPE_ERROR("ti", "IsotopeErrorRange", "Range of allowed isotope peak errors; Default: 0,1",
- "Takes into account the error introduced by choosing a non-monoisotopic peak for fragmentation.\n" +
- "\t The combination of -t and -ti determines the precursor mass tolerance.\n" +
- "\t E.g. \"-t 20ppm -ti -1,2\" tests abs(ObservedPepMass - TheoreticalPepMass - n * 1.00335Da) < 20ppm for n = -1, 0, 1, 2."),
-
- ENZYME_SPECIFICITY("ntt", "NTT", "Number of Tolerable Termini",
- "E.g. For trypsin, 0: non-tryptic, 1: semi-tryptic, 2: fully-tryptic peptides only."),
-
- MIN_PEPTIDE_LENGTH("minLength", "MinPepLength", "Minimum peptide length to consider; Default: 6", null),
- MAX_PEPTIDE_LENGTH("maxLength", "MaxPepLength", "Maximum peptide length to consider; Default: 40", null),
-
- MIN_CHARGE("minCharge", "MinCharge", "Minimum precursor charge to consider if charges are not specified in the spectrum file; Default: 2", null),
- MAX_CHARGE("maxCharge", "MaxCharge", "Maximum precursor charge to consider if charges are not specified in the spectrum file; Default: 3", null),
-
- NUM_MATCHES_SPEC("n", "NumMatchesPerSpec", "Number of matches per spectrum to be reported; Default: 1", null),
-
- CHARGE_CARRIER_MASSES("ccm", "ChargeCarrierMass", "Mass of charge carrier; Default: mass of proton (1.00727649)", null),
-
- MIN_NUM_PEAKS("minNumPeaks", "MinNumPeaksPerSpectrum", "Minimum number of peaks per spectrum; Default: " + Constants.MIN_NUM_PEAKS_PER_SPECTRUM, null),
-
- NUM_ISOFORMS("iso", "NumIsoforms", "Number of isoforms to consider per peptide; Default: " + Constants.NUM_VARIANTS_PER_PEPTIDE, null),
-
- IGNORE_MET_CLEAVAGE("ignoreMetCleavage", "IgnoreMetCleavage", "When 1, ignore N-terminal methionine cleavage",
- "0 means to consider protein N-term Met cleavage (Default)\n" +
- "\t 1 means to ignore protein N-term Met cleavage"),
-
- MIN_DE_NOVO_SCORE("minDeNovoScore", "MinDeNovoScore", "Minimum de Novo score; Default: " + Constants.MIN_DE_NOVO_SCORE, null),
-
- SPEC_INDEX("index", "SpecIndex", "Range of spectrum indices to be considered",
- "For example, to analyze the first 1000 spectra use -index 1,1000"),
-
- MS_LEVEL("msLevel", "MSLevel", "MS level or range of MS levels to consider; Default: 2",
- "Accepts a single value or a comma-separated range.\n" +
- "\t For example, -msLevel 2 to search only MS2 spectra\n" +
- "\t Or -msLevel 2,3 to search both MS2 and MS3 spectra"),
-
- MAX_MISSED_CLEAVAGES("maxMissedCleavages", "MaxMissedCleavages", "Exclude peptides with more than this number of missed cleavages from the search; Default: -1 (no limit)", null),
-
- TDA_STRATEGY("tda", "TDA", "Target decoy strategy",
- "0 means Don't search decoy database (Default)\n" +
- "\t 1 means search the decoy database (forward + reverse proteins)"),
-
- ADD_FEATURES("addFeatures", "AddFeatures", "Include additional features in the output (enable this to post-process results with Percolator)",
- "0 means Output basic scores only (Default)\n" +
- "\t 1 means Output additional features"),
-
- ALLOW_DENSE_CENTROIDED_PEAKS("allowDenseCentroidedPeaks", "AllowDenseCentroidedPeaks", "Allow centroid scans with dense peaks (Default: 0)\n" +
- "\t (for mzML or mzXML files, the console output will tell you if you might want to use this)", null),
-
- DD_DIRECTORY("dd", "DBIndexDir", "Path to the directory containing database index files", null),
-
- EDGE_SCORE("edgeScore", "EdgeScore", "Toggle edge scoring",
- "0 means Use Edge Scoring (Default)\n" +
- "\t 1 means Do not use edge scoring"),
-
- MAX_NUM_MODS("numMods", "NumMods", "Maximum number of dynamic (variable) modifications per peptide; Default: 3", null),
-
- // Note that static and dynamic modifications cannot be specified at the command line
- // Use -mod or -conf
- STATIC_MODIFICATION("staticMod", "StaticMod", "Static/Fixed modification", null),
-
- DYNAMIC_MODIFICATION("dynamicMod", "DynamicMod", "Dynamic/Variable modification", null),
-
- CUSTOM_AA("customAA", "CustomAA", "Custom amino acid", null),
-
- VERBOSE("verbose", null, "Console output message verbosity",
- "0 means Report total progress only\n" +
- "\t 1 means Report total and per-thread progress/status"),
-
- OUTPUT_FORMAT("outputFormat", "OutputFormat", "Output format for search results; Default: pin",
- "pin: Write Percolator .pin format directly (default; feeds into Percolator for rescoring)\n" +
- "\t tsv: Write TSV directly (faster, smaller files, compatible with OpenMS MSGFPlusAdapter)"),
-
- PRECURSOR_CAL("precursorCal", "PrecursorCal", "Precursor mass calibration mode; Default: auto",
- "auto: Run a quick pre-pass and apply a per-file ppm shift only when >= 200 confident PSMs are collected (default)\n" +
- "\t on: Always apply the learned shift, even if fewer PSMs are collected\n" +
- "\t off: Skip calibration entirely (bit-identical to builds without the flag)");
-
- private String key;
- private String name;
- private String description;
- private String additionalDescription;
-
- ParamNameEnum(String key, String name, String description, String additionalDescription) {
- this.key = key;
- this.name = name;
- this.description = description;
- this.additionalDescription = additionalDescription;
- }
-
- /**
- * Parameter key; defines the command line argument for this parameter
- * @return
- */
- public String getKey() {
- return key;
- }
-
- /**
- * Parameter name when used in a configuration file
- * @return
- */
- public String getName() {
- return name;
- }
-
- /**
- * Parameter description
- * @return
- */
- public String getDescription() {
- return description;
- }
-
- /**
- * Additional description
- * @return
- */
- public String getAdditionalDescription() {
- return additionalDescription;
- }
-
- /**
- * Check whether the parameter line matches this parameter's name
- * @param paramName Parameter name from the config file
- * @return True if it matches the parameter name of this class (more specifically, of a class that inherits from this class)
- */
- public boolean isThisParam(String paramName) {
- return ((getName() != null && paramName.equalsIgnoreCase(getName())));
- }
-
- public static String getParamNameFromLine(String lineSetting) {
- String[] lineParts = lineSetting.split("=");
- if (lineParts.length < 2)
- return "";
-
- String paramName = lineParts[0].trim();
-
- // Auto-update some names to change from abbreviations / alternate names to the standard name
- if (paramName.equalsIgnoreCase("IsotopeError")) {
- paramName = "IsotopeErrorRange";
- } else if (paramName.equalsIgnoreCase("TargetDecoyAnalysis")) {
- paramName = "TDA";
- } else if (paramName.equalsIgnoreCase("FragmentationMethod")) {
- paramName = "FragmentationMethodID";
- } else if (paramName.equalsIgnoreCase("Instrument")) {
- paramName = "InstrumentID";
- } else if (paramName.equalsIgnoreCase("Enzyme")) {
- paramName = "EnzymeID";
- } else if (paramName.equalsIgnoreCase("Protocol")) {
- paramName = "ProtocolID";
- } else if (paramName.equalsIgnoreCase("NumTolerableTermini")) {
- paramName = "NTT";
- } else if (paramName.equalsIgnoreCase("MinNumPeaks")) {
- paramName = "MinNumPeaksPerSpectrum";
- } else if (paramName.equalsIgnoreCase("MaxNumMods") || paramName.equalsIgnoreCase("MaxNumModsPerPeptide")) {
- paramName = "NumMods";
- } else if (paramName.equalsIgnoreCase("minLength") || paramName.equalsIgnoreCase("MinPeptideLength")) {
- paramName = "MinPepLength";
- } else if (paramName.equalsIgnoreCase("maxLength") || paramName.equalsIgnoreCase("MaxPeptideLength")) {
- paramName = "MaxPepLength";
- } else if (paramName.equalsIgnoreCase("PMTolerance") || paramName.equalsIgnoreCase("ParentMassTolerance")) {
- paramName = "PrecursorMassTolerance";
- }
-
- return paramName;
- }
- }
-
- public ParamManager(String toolName, String version, String date, String command) {
- this.toolName = toolName;
- this.version = version;
- this.date = date;
- this.command = command;
- params = new CaseInsensitiveLinkedHashMapParam();
- }
-
- public boolean addParameter(Parameter param) {
- if (params.containsKey(param.getKey())) {
- System.err.println("ParamManager: duplicate key (" + param.getKey() + ")");
- System.exit(-1);
- }
- params.put(param.getKey(), param);
- return true;
- }
-
- private void addExample(String example) {
- this.examples.add(example);
- }
-
- public Parameter getParameter(String key) {
- return params.get(key);
- }
-
- /**
- * Validates that required parameters are defined
- * @return Error message if an error, otherwise null
- */
- public String isValid() {
- Iterator> itr = params.entrySet().iterator();
- while (itr.hasNext()) {
- Entry entry = itr.next();
- Parameter param = entry.getValue();
- if (!param.isValid()) {
- return "Parameter -" + param.getKey() + " (" + param.getName() + ") is missing";
- }
- }
- return null;
- }
-
- public void printToolInfo() {
- System.out.println(this.toolName + " " + this.version + " (" + this.date + ")");
- }
-
- public void printJVMInfo() {
- System.out.println("Java " + System.getProperty("java.version") + " (" + System.getProperty("java.vendor") + ")");
- System.out.println(System.getProperty("os.name") + " (" + System.getProperty("os.arch") + ", version " + System.getProperty("os.version") + ")");
- }
-
- public void printUsageInfo() {
- System.out.println();
- System.out.println(this.toolName + " " + this.version + " (" + this.date + ")");
- System.out.println();
- System.out.println("Usage: " + this.command);
-
- ArrayList optParams = new ArrayList<>();
- Iterator> itr = params.entrySet().iterator();
- while (itr.hasNext()) {
- Entry entry = itr.next();
- Parameter param = entry.getValue();
- if (!param.isHidden()) {
- if (!param.isOptional()) {
- System.out.println("\t" + param);
- if (param.getAdditionalDescription() != null)
- System.out.println("\t " + param.getAdditionalDescription());
- } else {
- optParams.add(param);
- }
- }
- }
-
- for (Parameter param : optParams) {
- System.out.println("\t" + param);
- if (param.getAdditionalDescription() != null)
- System.out.println("\t " + param.getAdditionalDescription());
- }
-
- System.out.println();
- for (String example : examples)
- System.out.println(example);
-
- System.out.println();
- System.out.println("For Thermo .raw files, obtain a centroided .mzML file using MSConvert, which is part of ProteoWizard (http://proteowizard.sourceforge.net/)");
- System.out.println(" MSConvert.exe DatasetName.raw --filter \"peakPicking true 1-\" --mzML --32");
- System.out.println();
- System.out.println("To add or override the enzyme definitions, create a file named enzymes.txt in a directory named params below the working directory.");
- System.out.println("For example, create file C:\\Work\\params\\enzymes.txt when the working directory is C:\\Work");
- System.out.println("Example enzymes.txt file: https://github.com/MSGFPlus/msgfplus/blob/master/docs/examples/enzymes.txt");
- System.out.println();
- System.out.println("Documentation: https://msgfplus.github.io/msgfplus/");
- System.out.println("Releases: https://github.com/MSGFPlus/msgfplus/releases");
- }
-
- public void printValues() {
- Iterator> itr = params.entrySet().iterator();
- while (itr.hasNext()) {
- Entry entry = itr.next();
- Parameter param = entry.getValue();
- System.out.println(param.getKey() + "\t" + param.getValueAsString());
- }
- }
-
- public String parseParams(String argv[]) {
- if (argv.length == 0) {
- return "No parameter specified.";
- }
-
- if (argv.length < 2 || argv.length % 2 != 0) {
- return "The number of parameters must be even. If a file path has a space, surround it with double quotes.";
- }
-
- for (int i = 0; i < argv.length; i += 2) {
- if (!argv[i].startsWith("-") || i + 1 >= argv.length || argv[i].length() <= 1) {
- return "Syntax error; parameter names must start with a dash: " + argv[i];
- } else {
- String key = argv[i].substring(1);
- Parameter param = params.get(key);
- if (param == null) {
- return "Invalid parameter: " + argv[i] + ".";
- } else {
- String error = param.parse(argv[i + 1]);
- if (error != null) {
- String err = "Invalid value for parameter " + argv[i] + ": " + argv[i + 1];
- err += "\n (" + error + ")";
- return err;
- }
- param.setValueAssigned();
- }
- }
- }
-
- String error = isValid();
- if (error != null)
- return error;
-
- return null;
- }
-
- public void addSpecFileParam(boolean isOptional) {
- FileParameter specFileParam = new FileParameter(ParamNameEnum.SPECTRUM_FILE);
- if (isOptional) {
- specFileParam.setAsOptional();
- }
- specFileParam.addFileFormat(SpecFileFormat.MZML);
- specFileParam.addFileFormat(SpecFileFormat.MGF);
- specFileParam.addFileFormat(SpecFileFormat.MS2);
- specFileParam.addFileFormat(SpecFileFormat.PKL);
- specFileParam.addFileFormat(SpecFileFormat.DTA_TXT);
- specFileParam.addFileFormat(FileFormat.DIRECTORY);
- specFileParam.fileMustExist();
- specFileParam.setAdditionalDescription(ParamNameEnum.SPECTRUM_FILE.additionalDescription);
- addParameter(specFileParam);
- }
-
- private void addDBFileParam(boolean isOptional) {
- addDBFileParam(ParamNameEnum.DB_FILE, isOptional);
- }
-
- private void addDBFileParam(ParamNameEnum paramInfo, boolean isOptional) {
- FileParameter dbFileParam = new FileParameter(paramInfo);
- if (isOptional) {
- dbFileParam.setAsOptional();
- }
- dbFileParam.addFileFormat(DBFileFormat.FASTA);
- dbFileParam.fileMustExist();
- dbFileParam.mustBeAFile();
- addParameter(dbFileParam);
- }
-
- private void addDecoyPrefixParam() {
- addDecoyPrefixParam(MSGFPlus.DEFAULT_DECOY_PROTEIN_PREFIX);
- }
-
- private void addDecoyPrefixParam(String defaultDecoyPrefix) {
- StringParameter decoyPrefixParam = new StringParameter(ParamNameEnum.DECOY_PREFIX);
- // Note that defining a default value auto-sets isOptional to True
- decoyPrefixParam.defaultValue(defaultDecoyPrefix);
- addParameter(decoyPrefixParam);
- }
-
- private void addPrecursorMassToleranceParam() {
- ToleranceParameter pmTolParam = new ToleranceParameter(ParamNameEnum.PRECURSOR_MASS_TOLERANCE);
- pmTolParam.defaultValue("20ppm");
- addParameter(pmTolParam);
- }
-
- /**
- * -o for MS-GF+. Accepts only .pin (default) and .tsv after mzid removal.
- */
- private void addMzIdOutputFileParam() {
- FileParameter outputParam = new FileParameter(ParamNameEnum.SEARCH_OUTPUT_FILE);
- outputParam.addFileFormat(new FileFormat(".pin"));
- outputParam.addFileFormat(new FileFormat(".tsv"));
- outputParam.setAsOptional();
- addParameter(outputParam);
- }
-
- /**
- * Used by both MS-GF+ and MS-GFDB
- * MS-GF+ passes True for doNotAddMergeMode, thus ignoring ActivationMethod.FUSION
- *
- * @param defaultMethod
- * @param doNotAddMergeMode
- */
- private void addFragMethodParam(ActivationMethod defaultMethod, boolean doNotAddMergeMode) {
- ObjectEnumParameter fragParam = new ObjectEnumParameter<>(ParamNameEnum.FRAG_METHOD);
- ActivationMethod[] methods = ActivationMethod.getAllRegisteredActivationMethods();
- for (ActivationMethod m : methods) {
- if (doNotAddMergeMode && m == ActivationMethod.FUSION)
- continue;
- fragParam.registerObject(m);
- if (m == defaultMethod)
- fragParam.setDefault();
- }
- addParameter(fragParam);
- }
-
- private void addInstTypeParam() {
- addInstTypeParam(InstrumentType.LOW_RESOLUTION_LTQ);
- }
-
- private void addInstTypeParam(InstrumentType defaultInst) {
- ObjectEnumParameter instParam = new ObjectEnumParameter(ParamNameEnum.INSTRUMENT_TYPE);
- InstrumentType[] allInstTypes = InstrumentType.getAllRegisteredInstrumentTypes();
- for (InstrumentType inst : allInstTypes) {
- instParam.registerObject(inst);
- if (inst == defaultInst)
- instParam.setDefault();
- }
- addParameter(instParam);
- }
-
- private void addEnzymeParam() {
- addEnzymeParam(Enzyme.TRYPSIN);
- }
-
- private void addEnzymeParam(Enzyme enzymeId) {
- ObjectEnumParameter enzParam = new ObjectEnumParameter<>(ParamNameEnum.ENZYME_ID);
- Enzyme[] allEnzymes = Enzyme.getAllRegisteredEnzymes();
- for (Enzyme e : allEnzymes) {
- enzParam.registerObject(e);
- if (e == enzymeId)
- enzParam.setDefault();
- }
- addParameter(enzParam);
- }
-
- private void addProtocolParam() {
- addProtocolParam(Protocol.AUTOMATIC);
- }
-
- private void addProtocolParam(Protocol defaultProtocol) {
- ObjectEnumParameter protocolParam = new ObjectEnumParameter(ParamNameEnum.PROTOCOL_ID);
- Protocol[] protocols = Protocol.getAllRegisteredProtocols();
- for (Protocol protocol : protocols) {
- protocolParam.registerObject(protocol);
- if (protocol == defaultProtocol)
- protocolParam.setDefault();
- }
- addParameter(protocolParam);
- }
-
- private void addEnzymeSpecificityParam() {
- EnumParameter nttParam = new EnumParameter(ParamNameEnum.ENZYME_SPECIFICITY);
- nttParam.registerEntry("");
- nttParam.registerEntry("");
- nttParam.registerEntry("").setDefault();
- addParameter(nttParam);
- }
-
- private void addModFileParam() {
- FileParameter modParam = new FileParameter(ParamNameEnum.MOD_FILE);
- modParam.setAsOptional();
- modParam.fileMustExist();
- addParameter(modParam);
- }
-
- private void addConfigFileParam() {
- FileParameter configFile = new FileParameter(ParamNameEnum.CONFIGURATION_FILE);
- configFile.setAsOptional();
- configFile.fileMustExist();
- addParameter(configFile);
- }
-
- private void addIsotopeRangeParam() {
- IntRangeParameter isotopeRange = new IntRangeParameter(ParamNameEnum.ISOTOPE_ERROR);
- isotopeRange.setMaxInclusive();
- isotopeRange.defaultValue("0,1");
- addParameter(isotopeRange);
- }
-
- private IntParameter addNumThreadsParam() {
- IntParameter numThreadsParam = new IntParameter(ParamNameEnum.NUM_THREADS);
- numThreadsParam.defaultValue(Runtime.getRuntime().availableProcessors());
- numThreadsParam.minValue(1);
- addParameter(numThreadsParam);
- return numThreadsParam;
- }
-
- private void addVerboseModeParam() {
- EnumParameter verboseOutputParam = new EnumParameter(ParamNameEnum.VERBOSE);
- verboseOutputParam.registerEntry("Report total progress only").setDefault();
- verboseOutputParam.registerEntry("Report total and per-thread progress/status");
- addParameter(verboseOutputParam);
- }
-
- private void addNumTasksParam() {
- IntParameter numTasksParam = new IntParameter(ParamNameEnum.NUM_TASKS);
- numTasksParam.defaultValue(0);
- numTasksParam.minValue(-10);
- addParameter(numTasksParam);
- }
-
- private void addMinSpectraPerThreadParam() {
- IntParameter minSpectraParam = new IntParameter(ParamNameEnum.MIN_SPECTRA_PER_THREAD);
- minSpectraParam.defaultValue(250);
- minSpectraParam.minValue(1);
- addParameter(minSpectraParam);
- }
-
- private void addTdaParam() {
- EnumParameter tdaParam = new EnumParameter(ParamNameEnum.TDA_STRATEGY);
- tdaParam.registerEntry("Don't search decoy database").setDefault();
- tdaParam.registerEntry("Search decoy database");
- addParameter(tdaParam);
- }
-
- private void addMinPeptideLengthParam() {
- IntParameter minLenParam = new IntParameter(ParamNameEnum.MIN_PEPTIDE_LENGTH);
- minLenParam.minValue(1);
- minLenParam.defaultValue(6);
- addParameter(minLenParam);
- }
-
- private void addMaxPeptideLengthParam() {
- IntParameter maxLenParam = new IntParameter(ParamNameEnum.MAX_PEPTIDE_LENGTH);
- maxLenParam.minValue(1);
- maxLenParam.defaultValue(40);
- addParameter(maxLenParam);
- }
-
- private void addMinChargeParam() {
- IntParameter minCharge = new IntParameter(ParamNameEnum.MIN_CHARGE);
- minCharge.minValue(1);
- minCharge.defaultValue(2);
- addParameter(minCharge);
- }
-
- private void addMaxChargeParam() {
- IntParameter maxCharge = new IntParameter(ParamNameEnum.MAX_CHARGE);
- maxCharge.minValue(1);
- maxCharge.defaultValue(3);
- addParameter(maxCharge);
- }
-
- private void addNumMatchesPerSpecParam() {
- IntParameter numMatchesParam = new IntParameter(ParamNameEnum.NUM_MATCHES_SPEC);
- numMatchesParam.minValue(1);
- numMatchesParam.defaultValue(1);
- addParameter(numMatchesParam);
- }
-
- private void addAddFeaturesParam() {
- EnumParameter addFeatureParam = new EnumParameter(ParamNameEnum.ADD_FEATURES);
- addFeatureParam.registerEntry("Output basic scores only").setDefault();
- addFeatureParam.registerEntry("Output additional features");
- addParameter(addFeatureParam);
- }
-
- private void addOutputFormatParam() {
- // mzid output has been removed — MS-GF+ results feed into Percolator
- // via the .pin format. Only pin (default) and tsv are supported now.
- // Previous integer mappings 0=mzid, 2=both are no longer accepted.
- EnumParameter outputFormatParam = new EnumParameter(ParamNameEnum.OUTPUT_FORMAT);
- outputFormatParam.registerEntry("pin").setDefault();
- outputFormatParam.registerEntry("tsv");
- addParameter(outputFormatParam);
- }
-
- public int getOutputFormat() {
- return ((EnumParameter) getParameter(ParamNameEnum.OUTPUT_FORMAT.key)).getValue();
- }
-
- private void addPrecursorCalParam() {
- StringParameter precursorCalParam = new StringParameter(ParamNameEnum.PRECURSOR_CAL);
- precursorCalParam.defaultValue("auto");
- addParameter(precursorCalParam);
- }
-
- /**
- * Returns the raw value of the {@code -precursorCal} flag; one of
- * {@code "auto"}, {@code "on"}, or {@code "off"} (case-insensitive).
- * Use {@link SearchParams#getPrecursorCalMode()} for the parsed enum.
- */
- public String getPrecursorCalRawValue() {
- StringParameter param = (StringParameter) getParameter(ParamNameEnum.PRECURSOR_CAL.key);
- return param == null ? "auto" : param.value;
- }
-
- private void addChargeCarrierMassParam() {
- DoubleParameter chargeCarrierMassParam = new DoubleParameter(ParamNameEnum.CHARGE_CARRIER_MASSES);
- chargeCarrierMassParam.minValue(0.1);
- chargeCarrierMassParam.setMaxInclusive();
- chargeCarrierMassParam.defaultValue(Composition.PROTON);
- addParameter(chargeCarrierMassParam);
- }
-
- private void addMaxMissedCleavagesParam() {
- IntParameter maxMissedCleavages = new IntParameter(ParamNameEnum.MAX_MISSED_CLEAVAGES);
- maxMissedCleavages.minValue(-1);
- maxMissedCleavages.defaultValue(-1);
- addParameter(maxMissedCleavages);
- }
-
- private void addMaxNumModsParam() {
- IntParameter maxNumMods = new IntParameter(ParamNameEnum.MAX_NUM_MODS);
- maxNumMods.minValue(0);
- maxNumMods.defaultValue(3);
- addParameter(maxNumMods);
- }
-
- private void addAllowDenseCentroidedPeaksParam() {
- EnumParameter allowDenseCentroidedPeaksParam = new EnumParameter(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS);
- allowDenseCentroidedPeaksParam.registerEntry("Skip all spectra that fail a peak density check").setDefault();
- allowDenseCentroidedPeaksParam.registerEntry("Allow mzML/mzXML centroided spectra that fail a peak density check");
- addParameter(allowDenseCentroidedPeaksParam);
- }
-
- private void addDbIndexDirParam(boolean isHidden) {
- FileParameter dbIndexDirParam = new FileParameter(ParamNameEnum.DD_DIRECTORY);
- dbIndexDirParam.fileMustExist();
- dbIndexDirParam.mustBeADirectory();
- dbIndexDirParam.setAsOptional();
- if (isHidden) {
- dbIndexDirParam.setHidden();
- }
- addParameter(dbIndexDirParam);
- }
-
- private void addPrecursorMassToleranceUnitsParam(boolean isHidden) {
- EnumParameter unitParam = new EnumParameter(ParamNameEnum.PRECURSOR_MASS_TOLERANCE_UNITS);
- unitParam.registerEntry("Da");
- unitParam.registerEntry("ppm");
- unitParam.registerEntry("Don't care").setDefault();
- if (isHidden) {
- unitParam.setHidden();
- }
- addParameter(unitParam);
- }
-
- private void addSpecIndexRangeParam(boolean isHidden) {
- IntRangeParameter specIndexParam = new IntRangeParameter(ParamNameEnum.SPEC_INDEX);
- specIndexParam.minValue(1);
- specIndexParam.setMaxInclusive();
- specIndexParam.defaultValue("1," + (Integer.MAX_VALUE - 1));
- if (isHidden) {
- specIndexParam.setHidden();
- }
- addParameter(specIndexParam);
- }
-
- private void addMSLevelParam() {
- IntRangeParameter msLevelParam = new IntRangeParameter(ParamNameEnum.MS_LEVEL);
- msLevelParam.minValue(1);
- msLevelParam.setMaxInclusive();
- msLevelParam.defaultValue("2,2");
- addParameter(msLevelParam);
- }
-
- private void addEdgeScoreParam(boolean isHidden) {
- EnumParameter edgeScoreParam = new EnumParameter(ParamNameEnum.EDGE_SCORE.key);
- edgeScoreParam.registerEntry("Use edge scoring").setDefault();
- edgeScoreParam.registerEntry("Do not use edge scoring");
- if (isHidden) {
- edgeScoreParam.setHidden();
- }
- addParameter(edgeScoreParam);
- }
-
- private void addMinNumPeaksParam(boolean isHidden) {
- IntParameter minNumPeaksParam = new IntParameter(ParamNameEnum.MIN_NUM_PEAKS);
- minNumPeaksParam.defaultValue(Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
- if (isHidden) {
- minNumPeaksParam.setHidden();
- }
- addParameter(minNumPeaksParam);
- }
-
- private void addNumIsoformsParam(boolean isHidden) {
- IntParameter isoParam = new IntParameter(ParamNameEnum.NUM_ISOFORMS);
- isoParam.defaultValue(Constants.NUM_VARIANTS_PER_PEPTIDE);
- if (isHidden) {
- isoParam.setHidden();
- }
- addParameter(isoParam);
- }
-
- private void addMetCleavageParamParam(boolean isHidden) {
- EnumParameter metCleavageParam = new EnumParameter(ParamNameEnum.IGNORE_MET_CLEAVAGE);
- metCleavageParam.registerEntry("Consider protein N-term Met cleavage").setDefault();
- metCleavageParam.registerEntry("Ignore protein N-term Met cleavage");
- if (isHidden) {
- metCleavageParam.setHidden();
- }
- addParameter(metCleavageParam);
- }
-
- private void addMinDeNovoScoreParam(boolean isHidden) {
- IntParameter minDeNovoScoreParam = new IntParameter(ParamNameEnum.MIN_DE_NOVO_SCORE);
- minDeNovoScoreParam.minValue(Integer.MIN_VALUE);
- minDeNovoScoreParam.defaultValue(Constants.MIN_DE_NOVO_SCORE);
- if (isHidden) {
- minDeNovoScoreParam.setHidden();
- }
- addParameter(minDeNovoScoreParam);
- }
-
- /**
- * Add parameters for MS-GF+
- */
- public void addMSGFPlusParams() {
-
- // -conf ConfigurationFileName
- addConfigFileParam();
-
- // -s SpectrumFile (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
- addSpecFileParam(true);
-
- // -d DatabaseFile (*.fasta or *.fa or *.faa)
- addDBFileParam(true);
- addDecoyPrefixParam();
-
- // [-o OutputFile (*.pin or *.tsv)] (Default: [SpectrumFileName].pin)
- addMzIdOutputFileParam();
-
- addPrecursorMassToleranceParam();
- addPrecursorMassToleranceUnitsParam(true);
-
- addIsotopeRangeParam();
-
- addNumThreadsParam();
- addNumTasksParam();
- addMinSpectraPerThreadParam();
- addVerboseModeParam();
-
- addTdaParam();
-
- addFragMethodParam(ActivationMethod.ASWRITTEN, true);
- addInstTypeParam();
- addEnzymeParam();
- addProtocolParam();
- addEnzymeSpecificityParam();
-
- addModFileParam();
-
- addMinPeptideLengthParam();
- addMaxPeptideLengthParam();
- addMinChargeParam();
- addMaxChargeParam();
-
- addNumMatchesPerSpecParam();
- addAddFeaturesParam();
- addOutputFormatParam();
- addPrecursorCalParam();
- addChargeCarrierMassParam();
- addMaxMissedCleavagesParam();
- addMaxNumModsParam();
-
- addAllowDenseCentroidedPeaksParam();
- addMSLevelParam();
-
- addExample("Example (high-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 1 -t 20ppm -ti -1,2 -ntt 2 -tda 1 -o testMSGFPlus.pin -mod Mods.txt");
- addExample("Example (low-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 0 -t 0.5Da,2.5Da -ntt 2 -tda 1 -o testMSGFPlus.pin -mod Mods.txt");
-
- // Hidden parameters
- addDbIndexDirParam(true);
- addSpecIndexRangeParam(true);
- addEdgeScoreParam(true);
- addMinNumPeaksParam(true);
- addNumIsoformsParam(true);
- addMetCleavageParamParam(true);
- addMinDeNovoScoreParam(true);
-
- } // MSGFPlusParams
-
- public FileParameter getSpecFileParam() {
- return ((FileParameter) getParameter(ParamNameEnum.SPECTRUM_FILE.key));
- }
-
- public FileParameter getDBFileParam() {
- return ((FileParameter) getParameter(ParamNameEnum.DB_FILE.key));
- }
-
- public String getDecoyProteinPrefix() {
- StringParameter decoyProteinPrefixParam = (StringParameter)getParameter(ParamNameEnum.DECOY_PREFIX.key);
- return (decoyProteinPrefixParam.value);
- }
-
- public double getChargeCarrierMass() {
- return getDoubleValue(ParamNameEnum.CHARGE_CARRIER_MASSES.key);
- }
-
- public ToleranceParameter getPrecursorMassToleranceParam() {
- return ((ToleranceParameter) getParameter(ParamNameEnum.PRECURSOR_MASS_TOLERANCE.key));
- }
-
- public int getToleranceUnit() {
- return getIntValue(ParamNameEnum.PRECURSOR_MASS_TOLERANCE_UNITS.key);
- }
-
- public IntRangeParameter getIsotopeRangeParameter() {
- return (IntRangeParameter) getParameter(ParamNameEnum.ISOTOPE_ERROR.key);
- }
-
- public FileParameter getOutputFileParam() {
- return ((FileParameter) getParameter(ParamNameEnum.SEARCH_OUTPUT_FILE.key));
- }
-
- public ActivationMethod getActivationMethod() {
- return (ActivationMethod) ((ObjectEnumParameter>) getParameter(ParamNameEnum.FRAG_METHOD.key)).getObject();
- }
-
- public InstrumentType getInstType() {
- return (InstrumentType) ((ObjectEnumParameter>) getParameter(ParamNameEnum.INSTRUMENT_TYPE.key)).getObject();
- }
-
- public Enzyme getEnzyme() {
- return (Enzyme) ((ObjectEnumParameter>) getParameter(ParamNameEnum.ENZYME_ID.key)).getObject();
- }
-
- public int getNumTolerableTermini() {
- return getIntValue(ParamNameEnum.ENZYME_SPECIFICITY.key);
- }
-
- public int getNumMatchesPerSpectrum() {
- return getIntValue(ParamNameEnum.NUM_MATCHES_SPEC.key);
- }
-
- public IntRangeParameter getSpecIndexParameter() {
- return ((IntRangeParameter) getParameter(ParamNameEnum.SPEC_INDEX.key));
- }
-
- public IntRangeParameter getMSLevelParameter() {
- return ((IntRangeParameter) getParameter(ParamNameEnum.MS_LEVEL.key));
- }
-
- public int getTDA() {
- return getIntValue(ParamNameEnum.TDA_STRATEGY.key);
- }
-
- public int getIgnoreMetCleavage() {
- return getIntValue(ParamNameEnum.IGNORE_MET_CLEAVAGE.key);
- }
-
- public int getOutputAdditionalFeatures() {
- return getIntValue(ParamNameEnum.ADD_FEATURES.key);
- }
-
- public int getMinPeptideLength() {
- return getIntValue(ParamNameEnum.MIN_PEPTIDE_LENGTH.key);
- }
-
- public int getMaxPeptideLength() {
- return getIntValue(ParamNameEnum.MAX_PEPTIDE_LENGTH.key);
- }
-
- public int getMaxNumVariantsPerPeptide() {
- return getIntValue(ParamNameEnum.NUM_ISOFORMS.key);
- }
-
- public int getMinCharge() {
- return getIntValue(ParamNameEnum.MIN_CHARGE.key);
- }
-
- public int getMaxCharge() {
- return getIntValue(ParamNameEnum.MAX_CHARGE.key);
- }
-
- public int getNumThreads() {
- return getIntValue(ParamNameEnum.NUM_THREADS.key);
- }
-
- public int getNumTasks() {
- return getIntValue(ParamNameEnum.NUM_TASKS.key);
- }
-
- public int getMinSpectraPerThread() {
- return getIntValue(ParamNameEnum.MIN_SPECTRA_PER_THREAD.key);
- }
-
- public int getVerboseFlag() {
- return getIntValue(ParamNameEnum.VERBOSE.key);
- }
-
- public int getEdgeScoreFlag() {
- return getIntValue(ParamNameEnum.EDGE_SCORE.key);
- }
-
- // Used by MS-GF+
- public File getDatabaseIndexDir() {
- return getFile("dd");
- }
-
- public int getMinNumPeaksPerSpectrum() {
- return getIntValue(ParamNameEnum.MIN_NUM_PEAKS.key);
- }
-
- public int getMinDeNovoScore() {
- return getIntValue(ParamNameEnum.MIN_DE_NOVO_SCORE.key);
- }
-
- public int getMaxMissedCleavages() {
- return getIntValue(ParamNameEnum.MAX_MISSED_CLEAVAGES.key);
- }
-
- public int getMaxNumModsPerPeptide() {
- Parameter param = this.getParameter(ParamNameEnum.MAX_NUM_MODS.key);
- if (param == null) {
- this.addMaxNumModsParam();
- }
- return getIntValue(ParamNameEnum.MAX_NUM_MODS.key);
- }
-
- public Protocol getProtocol() {
- return (Protocol) ((ObjectEnumParameter>) getParameter(ParamNameEnum.PROTOCOL_ID.key)).getObject();
- }
-
- public FileParameter getModFileParam() {
- return ((FileParameter) getParameter(ParamNameEnum.MOD_FILE.key));
- }
-
- // Used by MS-GF+
- public FileParameter getConfigFileParam() {
- return ((FileParameter) getParameter(ParamNameEnum.CONFIGURATION_FILE.key));
- }
-
- // Used by MS-GF+
- public int getAllowDenseCentroidedPeaks() {
- return getIntValue(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.key);
- }
-
- public int getIntValue(String key) {
- Parameter param = this.getParameter(key);
- if (param instanceof IntParameter)
- return ((IntParameter) param).getValue();
- else {
- System.err.println("[Error] in ParamManager.getIntValue: " + key + " is not an instance of IntParameter.");
- System.exit(-1);
- }
- return -1;
- }
-
- public float getFloatValue(String key) {
- Parameter param = this.getParameter(key);
- if (param instanceof FloatParameter)
- return ((FloatParameter) param).getValue();
- else {
- System.err.println("[Error] in ParamManager.getFloatValue: " + key + " is not an instance of FloatParameter.");
- System.exit(-1);
- }
- return -1;
- }
-
- public double getDoubleValue(String key) {
- Parameter param = this.getParameter(key);
- if (param instanceof DoubleParameter)
- return ((DoubleParameter) param).getValue();
- else {
- System.err.println("[Error] in ParamManager.getDoubleValue: " + key + " is not an instance of DoubleParameter.");
- System.exit(-1);
- }
- return -1;
- }
-
- public File getFile(String key) {
- Parameter param = this.getParameter(key);
- if (param instanceof FileParameter)
- return ((FileParameter) param).getFile();
- else {
- System.err.println("[Error] in ParamManager.getFile: " + key + " is not an instance of FileParameter.");
- System.exit(-1);
- }
- return null;
- }
-
- public File[] getFiles(String key) {
- Parameter param = this.getParameter(key);
- if (param instanceof FileListParameter)
- return ((FileListParameter) param).getFiles();
- else {
- System.err.println("[Error] in ParamManager.getFile: " + key + " is not an instance of FileListParameter.");
- System.exit(-1);
- }
- return null;
- }
-
- public void setMaxNumMods(int numMods) {
- Parameter numModsParam = getParameter(ParamManager.ParamNameEnum.MAX_NUM_MODS.getKey());
- numModsParam.parse(String.valueOf(numMods));
- }
-
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/ParamParser.java b/src/main/java/edu/ucsd/msjava/params/ParamParser.java
deleted file mode 100644
index ab9000d1..00000000
--- a/src/main/java/edu/ucsd/msjava/params/ParamParser.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import edu.ucsd.msjava.parser.BufferedLineReader;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.HashMap;
-
-
-/**
- * This class is for parsing parameter files used in MS-GF, MS-Dictionary and MS-Profile.
- *
- * @author sangtaekim
- */
-public class ParamParser {
- public static class Parameters extends CaseInsensitiveMap {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public String getParameter(String name) {
- return get(name);
- }
-
- public Integer getIntParameter(String name) {
- String param = get(name);
- if (param == null)
- return null;
- else return Integer.parseInt(param);
- }
-
- public Float getFloatParameter(String name) {
- String param = get(name);
- if (param == null)
- return null;
- else return Float.parseFloat(param);
- }
- }
-
- /**
- * Parses the specified parameter file.
- *
- * @param fileName the name of the parameter file.
- * @return A table of parameters.
- */
- public static Parameters parseFromFile(String fileName) {
- Parameters params = new Parameters();
- BufferedLineReader in = null;
- try {
- in = new BufferedLineReader(fileName);
- } catch (IOException e) {
- e.printStackTrace();
- }
- String s;
- while ((s = in.readLine()) != null) {
- if (s.startsWith("#") || s.length() == 0)
- continue;
- String[] token = s.split("=");
- if (token.length != 2)
- continue;
- else
- params.put(token[0].trim(), token[1].trim());
- }
- return params;
- }
-
- public static Parameters parseFromString(String paramString) {
- String errMsg = "Number of parameters must be even. If a file path has a space, surround it with double quotes.";
-
- Parameters params = new Parameters();
- String[] token = paramString.split("\\s+");
- if (token.length % 2 != 0) {
- System.err.println(errMsg);
- System.exit(-1);
- }
-
- for (int i = 0; i < token.length; i += 2) {
- if (!token[i].startsWith("-") || i + 1 >= token.length) {
- System.err.println(errMsg);
- }
- params.put(token[i].trim(), token[i + 1].trim());
- }
- return params;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/Parameter.java b/src/main/java/edu/ucsd/msjava/params/Parameter.java
deleted file mode 100644
index 8609c0da..00000000
--- a/src/main/java/edu/ucsd/msjava/params/Parameter.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public abstract class Parameter {
- private String key;
- private String name;
- private String description;
- private boolean isOptional = false;
- private boolean isValueAssigned = false;
- private String additionalDescription = null;
-
- private boolean hidden = false;
-
- protected Parameter(String key, String name, String description) {
- this.key = key;
- this.name = name;
- this.description = description;
- }
-
- protected void setOptional() {
- this.isOptional = true;
- }
-
- public void setHidden() {
- this.hidden = true;
- }
-
- public String getKey() {
- return key;
- }
-
- public String getName() {
- return name;
- }
-
- public String getDescription() {
- return description;
- }
-
- public String getAdditionalDescription() {
- return additionalDescription;
- }
-
- public boolean isOptional() {
- return isOptional;
- }
-
- public boolean isHidden() {
- return hidden;
- }
-
- public void setAdditionalDescription(String additionalDescription) {
- this.additionalDescription = additionalDescription;
- }
-
- public String toString() {
- String usage = "-" + getKey() + " " + getName();
- if (isOptional())
- usage = "[" + usage + "]";
- usage = usage + " " + "(" + getDescription() + ")";
- return usage;
- }
-
- public void setValueAssigned() {
- this.isValueAssigned = true;
- }
-
- public boolean isValueAssigned() {
- return isValueAssigned;
- }
-
- public boolean isValid() {
- if (isOptional)
- return true;
-
- return isValueAssigned();
- }
-
- public abstract String parse(String value);
-
- public abstract String getValueAsString();
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/RangeParameter.java b/src/main/java/edu/ucsd/msjava/params/RangeParameter.java
deleted file mode 100644
index 236924c1..00000000
--- a/src/main/java/edu/ucsd/msjava/params/RangeParameter.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public abstract class RangeParameter> extends Parameter {
- protected T min = null;
- protected T max = null;
- protected T minValue; // default: inclusive
- protected T maxValue; // default: exclusive
- protected boolean isMinInclusive = true;
- protected boolean isMaxInclusive = false;
-
- public RangeParameter(String key, String name, String description) {
- super(key, name, description);
- }
-
- public RangeParameter minValue(T minValue) {
- this.minValue = minValue;
- return this;
- }
-
- public RangeParameter maxValue(T maxValue) {
- this.maxValue = maxValue;
- return this;
- }
-
- public RangeParameter setMinExclusive() {
- this.isMinInclusive = false;
- return this;
- }
-
- public RangeParameter setMaxInclusive() {
- this.isMaxInclusive = true;
- return this;
- }
-
- public boolean isValueValid(T value) {
- return !(value.compareTo(minValue) < 0 || value.compareTo(maxValue) > 0
- || !isMinInclusive && value.equals(minValue)
- || !isMaxInclusive && value.equals(maxValue));
- }
-
- public RangeParameter defaultValue(String value) {
- super.setOptional();
- String error = parse(value);
- if (error != null) {
- System.err.println("(RangeParameter) Error while parsing the default value: " + error);
- System.exit(-1);
- }
- return this;
- }
-
- public abstract String parse(String value);
-
-
- @Override
- public String getValueAsString() {
- return min + "," + max;
- }
-
- public T getMin() {
- return min;
- }
-
- public T getMax() {
- return max;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/StringParameter.java b/src/main/java/edu/ucsd/msjava/params/StringParameter.java
deleted file mode 100644
index 4783bcc6..00000000
--- a/src/main/java/edu/ucsd/msjava/params/StringParameter.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.ucsd.msjava.params;
-
-public class StringParameter extends Parameter {
- String value = null;
-
- public StringParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
- public StringParameter(String key, String name, String description) {
- super(key, name, description);
- }
-
- public StringParameter defaultValue(String defaultValue) {
- this.value = defaultValue;
- super.setOptional();
- return this;
- }
-
- @Override
- public String parse(String value) {
- this.value = value.trim();
- return null;
- }
-
- @Override
- public String getValueAsString() {
- return (value == null ? "null" : value);
- }
-
-}
diff --git a/src/main/java/edu/ucsd/msjava/params/ToleranceParameter.java b/src/main/java/edu/ucsd/msjava/params/ToleranceParameter.java
deleted file mode 100644
index 05c47244..00000000
--- a/src/main/java/edu/ucsd/msjava/params/ToleranceParameter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package edu.ucsd.msjava.params;
-
-import edu.ucsd.msjava.msgf.Tolerance;
-
-public class ToleranceParameter extends Parameter {
-
- private Tolerance leftTolerance;
- private Tolerance rightTolerance;
- private boolean allowAsymmetricValues = true;
-
- public ToleranceParameter(ParamManager.ParamNameEnum paramInfo) {
- super(paramInfo.getKey(), paramInfo.getName(), paramInfo.getDescription());
- setAdditionalDescription(paramInfo.getAdditionalDescription());
- }
-
-
- public ToleranceParameter defaultValue(String value) {
- super.setOptional();
- String error = parse(value);
- if (error != null) {
- System.err.println("(ToleranceParameter) Error while setting default value: " + error);
- System.exit(-1);
- }
- return this;
- }
-
- public ToleranceParameter doNotAllowAsymmetricValues() {
- this.allowAsymmetricValues = false;
- return this;
- }
-
- @Override
- public String parse(String value) {
- String[] token = value.split(",");
- if (token.length == 1) {
- leftTolerance = rightTolerance = Tolerance.parseToleranceStr(token[0]);
- } else if (token.length == 2) {
- if (allowAsymmetricValues) {
- leftTolerance = Tolerance.parseToleranceStr(token[0]);
- rightTolerance = Tolerance.parseToleranceStr(token[1]);
- } else
- return "asymmetric values are not allowed";
- }
- if (leftTolerance == null || rightTolerance == null) {
- return "invalid tolerance value";
- }
- if (leftTolerance.isTolerancePPM() != rightTolerance.isTolerancePPM()) {
- return "left and right tolerance units must be the same";
- }
- if (leftTolerance.getValue() < 0 || rightTolerance.getValue() < 0) {
- return "parent mass tolerance must not be negative";
- }
- return null;
- }
-
- @Override
- public String getValueAsString() {
- if (leftTolerance == null || rightTolerance == null)
- return null;
- return leftTolerance.toString() + "," + rightTolerance.toString();
- }
-
- public Tolerance getLeftTolerance() {
- return leftTolerance;
- }
-
- public Tolerance getRightTolerance() {
- return rightTolerance;
- }
-}
diff --git a/src/test/java/msgfplus/TestMSLevelFiltering.java b/src/test/java/msgfplus/TestMSLevelFiltering.java
deleted file mode 100644
index 361c574d..00000000
--- a/src/test/java/msgfplus/TestMSLevelFiltering.java
+++ /dev/null
@@ -1,76 +0,0 @@
-package msgfplus;
-
-import static org.junit.Assert.*;
-
-import edu.ucsd.msjava.params.IntRangeParameter;
-import edu.ucsd.msjava.params.ParamManager;
-import org.junit.Test;
-
-/**
- * Tests for the -msLevel parameter (issue #159).
- * Verifies that MS level filtering is properly wired through ParamManager.
- */
-public class TestMSLevelFiltering {
-
- private ParamManager createParamManager() {
- ParamManager pm = new ParamManager("MS-GF+", "test", "2024.01.01", "test");
- pm.addMSGFPlusParams();
- return pm;
- }
-
- @Test
- public void testMSLevelParameterExists() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- assertNotNull("MS_LEVEL parameter should exist", msLevel);
- }
-
- @Test
- public void testMSLevelDefaultIsMS2() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- // Default should be MS2 only (2,2)
- assertEquals("Default min MS level should be 2", 2, (int) msLevel.getMin());
- assertEquals("Default max MS level should be 2", 2, (int) msLevel.getMax());
- }
-
- @Test
- public void testMSLevelParseSingleValue() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- String err = msLevel.parse("2");
- assertNull("Parsing '2' should succeed", err);
- assertEquals(2, (int) msLevel.getMin());
- assertEquals(2, (int) msLevel.getMax());
- }
-
- @Test
- public void testMSLevelParseRange() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- String err = msLevel.parse("2,3");
- assertNull("Parsing '2,3' should succeed", err);
- assertEquals(2, (int) msLevel.getMin());
- assertEquals(3, (int) msLevel.getMax());
- }
-
- @Test
- public void testMSLevelParseMS3Only() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- String err = msLevel.parse("3");
- assertNull("Parsing '3' should succeed", err);
- assertEquals(3, (int) msLevel.getMin());
- assertEquals(3, (int) msLevel.getMax());
- }
-
- @Test
- public void testMSLevelParseWideRange() {
- ParamManager pm = createParamManager();
- IntRangeParameter msLevel = pm.getMSLevelParameter();
- String err = msLevel.parse("1,5");
- assertNull("Parsing '1,5' should succeed", err);
- assertEquals(1, (int) msLevel.getMin());
- assertEquals(5, (int) msLevel.getMax());
- }
-}
From 1c68fb2ffc9107679a9f5806e3d9e3017f090f8d Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 06:37:04 +0100
Subject: [PATCH 20/34] refactor: drop MS2/PKL/DTA_TXT spectrum format support
Remove legacy text-format parsers; only MGF and mzML are retained.
- Delete MS2SpectrumParser, PklSpectrumParser, PNNLSpectrumParser,
PNNLSpectraIterator, PNNLSpectraMap from parser/
- Delete dead-code SPTxtParser, SpectrumParserWithTitle,
TSVParser, TSVResultParser, FullyBufferedLineReader from parser/
- Delete SpectraMapByTitle and SpectrumAccessorByTitle from msutil/
(only callers were in the deleted SPTxtParser)
- Remove MS2/PKL/DTA_TXT/MZDATA entries from SpecFileFormat
- Prune corresponding branches from SpectraAccessor.getSpecMap()
and getSpecItr() and getSpectrumIDFormatCvParam()
- Prune Spectrum.getSpectrumFileFormat() to mzML + MGF only
- Update SearchParams.isSupportedSpectrumFormat() to mzML + MGF only
- Update MSGFPlusOptions -s description to list only *.mzML, *.mgf
- Remove @Ignore generateTRexPRMSpectra test (used deleted TSVParser)
---
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 2 +-
.../ucsd/msjava/msdbsearch/SearchParams.java | 7 +-
.../ucsd/msjava/msutil/SpecFileFormat.java | 12 -
.../ucsd/msjava/msutil/SpectraAccessor.java | 50 +---
.../ucsd/msjava/msutil/SpectraMapByTitle.java | 26 --
.../java/edu/ucsd/msjava/msutil/Spectrum.java | 9 -
.../msutil/SpectrumAccessorByTitle.java | 5 -
.../parser/FullyBufferedLineReader.java | 81 ------
.../ucsd/msjava/parser/MS2SpectrumParser.java | 194 -------------
.../msjava/parser/PNNLSpectraIterator.java | 46 ----
.../ucsd/msjava/parser/PNNLSpectraMap.java | 47 ----
.../msjava/parser/PNNLSpectrumParser.java | 256 ------------------
.../ucsd/msjava/parser/PklSpectrumParser.java | 127 ---------
.../edu/ucsd/msjava/parser/SPTxtParser.java | 140 ----------
.../parser/SpectrumParserWithTitle.java | 8 -
.../edu/ucsd/msjava/parser/TSVParser.java | 44 ---
.../ucsd/msjava/parser/TSVResultParser.java | 133 ---------
src/test/java/msgfplus/TestMisc.java | 74 -----
18 files changed, 14 insertions(+), 1247 deletions(-)
delete mode 100644 src/main/java/edu/ucsd/msjava/msutil/SpectraMapByTitle.java
delete mode 100644 src/main/java/edu/ucsd/msjava/msutil/SpectrumAccessorByTitle.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/FullyBufferedLineReader.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/MS2SpectrumParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/PNNLSpectraIterator.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/PNNLSpectraMap.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/PNNLSpectrumParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/PklSpectrumParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/SPTxtParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/SpectrumParserWithTitle.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/TSVParser.java
delete mode 100644 src/main/java/edu/ucsd/msjava/parser/TSVResultParser.java
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index 26f82988..e6416223 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -40,7 +40,7 @@ public final class MSGFPlusOptions {
// ---------- input (required at runtime, but may be provided via -conf) ----------
@Option(names = "-s", paramLabel = "SpectrumFile",
- description = "Input spectrum file (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl, *_dta.txt) or directory of spectra. "
+ description = "Input spectrum file (*.mzML, *.mgf) or directory of spectra. "
+ "Required, unless provided via -conf as SpectrumFile=...")
public File spectrumFile;
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index a089d88c..2c66a799 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -500,13 +500,10 @@ public String parse(MSGFPlusOptions opts) {
return null;
}
- /** Spectrum-format whitelist (formerly enforced by FileParameter.isSupported). */
+ /** Spectrum-format whitelist: only mzML and MGF are supported. */
private static boolean isSupportedSpectrumFormat(SpecFileFormat fmt) {
return fmt == SpecFileFormat.MZML
- || fmt == SpecFileFormat.MGF
- || fmt == SpecFileFormat.MS2
- || fmt == SpecFileFormat.PKL
- || fmt == SpecFileFormat.DTA_TXT;
+ || fmt == SpecFileFormat.MGF;
}
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpecFileFormat.java b/src/main/java/edu/ucsd/msjava/msutil/SpecFileFormat.java
index 87b2d0c3..20ed52f1 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpecFileFormat.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpecFileFormat.java
@@ -23,10 +23,6 @@ public String getPSIName() {
public static final SpecFileFormat MGF;
public static final SpecFileFormat MZML;
- public static final SpecFileFormat MS2;
- public static final SpecFileFormat PKL;
- public static final SpecFileFormat MZDATA;
- public static final SpecFileFormat DTA_TXT;
public static SpecFileFormat getSpecFileFormat(String specFileName) {
String lowerCaseFileName = specFileName.toLowerCase();
@@ -44,17 +40,9 @@ public static SpecFileFormat getSpecFileFormat(String specFileName) {
static {
MGF = new SpecFileFormat(".mgf", "MS:1001062", "Mascot MGF file");
MZML = new SpecFileFormat(".mzML", "MS:1000584", "mzML file");
- MS2 = new SpecFileFormat(".ms2", "MS:1001466", "MS2 file");
- PKL = new SpecFileFormat(".pkl", "MS:1000565", "Micromass PKL file");
- MZDATA = new SpecFileFormat(".mzData", "MS:1000564", "PSI mzData file");
- DTA_TXT = new SpecFileFormat("_dta.txt", "MS:XXXXXXX", "PNNL dta.txt file");
specFileFormatList = new ArrayList();
specFileFormatList.add(MGF);
specFileFormatList.add(MZML);
- specFileFormatList.add(MS2);
- specFileFormatList.add(PKL);
- specFileFormatList.add(MZDATA);
- specFileFormatList.add(DTA_TXT);
}
}
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
index 57523ec9..56b51ef3 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
@@ -3,10 +3,10 @@
import edu.ucsd.msjava.mzml.StaxMzMLParser;
import edu.ucsd.msjava.mzml.StaxMzMLSpectraIterator;
import edu.ucsd.msjava.mzml.StaxMzMLSpectraMap;
-import edu.ucsd.msjava.parser.*;
+import edu.ucsd.msjava.parser.MgfSpectrumParser;
+import edu.ucsd.msjava.parser.SpectrumParser;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
@@ -71,21 +71,12 @@ public SpectrumAccessorBySpecIndex getSpecMap() {
}
}
specMap = new StaxMzMLSpectraMap(staxParser, minMSLevel, maxMSLevel);
- } else if (specFormat == SpecFileFormat.DTA_TXT)
- specMap = new PNNLSpectraMap(specFile.getPath());
- else {
- SpectrumParser parser = null;
- if (specFormat == SpecFileFormat.MGF)
- parser = new MgfSpectrumParser();
- else if (specFormat == SpecFileFormat.MS2)
- parser = new MS2SpectrumParser();
- else if (specFormat == SpecFileFormat.PKL)
- parser = new PklSpectrumParser();
- else
- return null;
-
+ } else if (specFormat == SpecFileFormat.MGF) {
+ SpectrumParser parser = new MgfSpectrumParser();
spectrumParser = parser;
specMap = new SpectraMap(specFile.getPath(), parser);
+ } else {
+ return null;
}
}
@@ -108,29 +99,16 @@ public Iterator getSpecItr() {
}
}
specItr = new StaxMzMLSpectraIterator(staxParser, minMSLevel, maxMSLevel);
- } else if (specFormat == SpecFileFormat.DTA_TXT)
- try {
- specItr = new PNNLSpectraIterator(specFile.getPath());
- } catch (IOException e) {
- e.printStackTrace();
- }
- else {
- SpectrumParser parser = null;
- if (specFormat == SpecFileFormat.MGF)
- parser = new MgfSpectrumParser();
- else if (specFormat == SpecFileFormat.MS2)
- parser = new MS2SpectrumParser();
- else if (specFormat == SpecFileFormat.PKL)
- parser = new PklSpectrumParser();
- else
- return null;
-
+ } else if (specFormat == SpecFileFormat.MGF) {
+ SpectrumParser parser = new MgfSpectrumParser();
spectrumParser = parser;
try {
specItr = new SpectraIterator(specFile.getPath(), parser);
} catch (IOException e) {
e.printStackTrace();
}
+ } else {
+ return null;
}
}
@@ -167,14 +145,8 @@ public String getTitle(int specIndex) {
public CvParamInfo getSpectrumIDFormatCvParam() {
CvParamInfo cvParam = null;
- if (specFormat == SpecFileFormat.DTA_TXT
- || specFormat == SpecFileFormat.MGF
- || specFormat == SpecFileFormat.PKL
- || specFormat == SpecFileFormat.MS2
- )
+ if (specFormat == SpecFileFormat.MGF)
cvParam = new CvParamInfo("MS:1000774", "multiple peak list nativeID format", null);
- else if (specFormat == SpecFileFormat.MZDATA)
- cvParam = new CvParamInfo("MS:1000777", "spectrum identifier nativeID format", null);
else if (specFormat == SpecFileFormat.MZML) {
if (staxParser == null) {
try {
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraMapByTitle.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraMapByTitle.java
deleted file mode 100644
index 68db2eba..00000000
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraMapByTitle.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package edu.ucsd.msjava.msutil;
-
-import edu.ucsd.msjava.parser.SpectrumParserWithTitle;
-
-import java.util.Hashtable;
-
-
-public class SpectraMapByTitle extends SpectraMap implements SpectrumAccessorByTitle {
-
- private Hashtable titleToSpecIndex = null; // key: specIndex, value: filePos
-
- public SpectraMapByTitle(String fileName, SpectrumParserWithTitle parser) {
- super(fileName, parser);
- lineReader.seek(0);
- titleToSpecIndex = parser.getTitleToSpecIndexMap(super.lineReader);
- }
-
- public Spectrum getSpectrumByTitle(String title) {
- Integer specIndex = titleToSpecIndex.get(title);
- if (specIndex == null)
- return null;
- else
- return super.getSpectrumBySpecIndex(specIndex);
- }
-
-}
diff --git a/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java b/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
index 1e6e27f6..473f622e 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
@@ -1029,15 +1029,6 @@ public static SpecFileFormat getSpectrumFileFormat(String specFileName) {
specFormat = SpecFileFormat.MZML;
else if (extension.equalsIgnoreCase(".mgf"))
specFormat = SpecFileFormat.MGF;
- else if (extension.equalsIgnoreCase(".ms2"))
- specFormat = SpecFileFormat.MS2;
- else if (extension.equalsIgnoreCase(".pkl"))
- specFormat = SpecFileFormat.PKL;
- }
- if (specFormat == null && specFileName.length() > 8) {
- String suffix = specFileName.substring(specFileName.length() - 8);
- if (suffix.equalsIgnoreCase("_dta.txt"))
- specFormat = SpecFileFormat.DTA_TXT;
}
return specFormat;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectrumAccessorByTitle.java b/src/main/java/edu/ucsd/msjava/msutil/SpectrumAccessorByTitle.java
deleted file mode 100644
index 933fbef7..00000000
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectrumAccessorByTitle.java
+++ /dev/null
@@ -1,5 +0,0 @@
-package edu.ucsd.msjava.msutil;
-
-public interface SpectrumAccessorByTitle {
- Spectrum getSpectrumByTitle(String title);
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/FullyBufferedLineReader.java b/src/main/java/edu/ucsd/msjava/parser/FullyBufferedLineReader.java
deleted file mode 100644
index 0dd8b88a..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/FullyBufferedLineReader.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-public class FullyBufferedLineReader implements LineReader {
- private int pointer;
- private byte[] buffer;
-
- private final byte CR = (byte) '\r';
- private final byte NL = (byte) '\n';
- int startIndex;
-
- public FullyBufferedLineReader(String fileName) {
- FileInputStream fin = null;
- try {
- fin = new FileInputStream(fileName);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
-
- // load file into memory
- FileChannel in = fin.getChannel();
- ByteBuffer tempBuffer = null;
- try {
-// System.out.println(Integer.MAX_VALUE + "\t" + in.size() + "\t" + (int)in.size());
- tempBuffer = ByteBuffer.allocate((int) in.size()); // file size must be smaller than 2^32
- in.read(tempBuffer);
- } catch (IOException e1) {
- e1.printStackTrace();
- }
-
- buffer = tempBuffer.array();
- pointer = 0;
- startIndex = 0;
- }
-
- public String readLine() // line terminating char: \n or \r\n
- {
- if (pointer >= buffer.length)
- return null;
- while (pointer < buffer.length) {
- if (buffer[pointer] != NL)
- pointer++;
- else {
- String str;
- if (pointer > 0 && buffer[pointer - 1] == CR)
- str = new String(buffer, startIndex, (pointer - startIndex - 1));
- else
- str = new String(buffer, startIndex, (pointer - startIndex));
- pointer++;
- startIndex = pointer;
- return str;
- }
- }
- String str = new String(buffer, startIndex, (pointer - startIndex));
- startIndex = pointer;
- return str;
- }
-
- public int getPosition() {
- return pointer;
- }
-
- public void seek(int position) {
- pointer = position;
- startIndex = pointer;
- }
-
- public void reset() {
- pointer = 0;
- startIndex = 0;
- }
-
- public int size() {
- return buffer.length;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/MS2SpectrumParser.java b/src/main/java/edu/ucsd/msjava/parser/MS2SpectrumParser.java
deleted file mode 100644
index ca1abf80..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/MS2SpectrumParser.java
+++ /dev/null
@@ -1,194 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.*;
-
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.Map;
-
-/**
- * A class that parses MS2 format
- *
- * @author sangtaekim
- */
-public class MS2SpectrumParser implements SpectrumParser {
-
- private Spectrum spec = null;
- private Boolean isSpecSorted = null;
-
- /**
- * Number of scans where we could not determine the scan number
- * This method is required by interface SpectrumParser
- * However, this class does not keep track of spectra without a scan number
- *
- * @return
- */
- public long getScanMissingWarningCount() {
- return 0;
- }
-
- /**
- * Reads a spectrum from ms2 file and returns it.
- *
- * @param lineReader A LineReader object points to the start of a spectrum.
- * @return a spectrum object.
- */
- public Spectrum readSpectrum(LineReader lineReader) {
- float prevMass = 0;
- String buf;
-
- do {
- buf = lineReader.readLine();
- }
- while (buf != null && buf.startsWith("H"));
- if (buf == null)
- return null;
-
- if (buf.startsWith("S")) {
- String[] token = buf.split("\\s+");
- spec = new Spectrum();
- int startScanNum = Integer.parseInt(token[1]);
- int endScanNum = Integer.parseInt(token[2]);
- float precursorMz = Float.parseFloat(token[3]);
- spec = new Spectrum(precursorMz, 0, 0);
- spec.setStartScanNum(startScanNum);
- spec.setEndScanNum(endScanNum);
- isSpecSorted = true;
- } else if (spec == null) {
- return null;
- }
-
- boolean zParsed = false;
- while ((buf = lineReader.readLine()) != null) {
- String[] token = buf.split("\\s+");
- if (buf.startsWith("H"))
- continue;
- else if (buf.startsWith("S")) // start of a next spectrum
- {
- Spectrum specCopy = spec;
- Boolean isSpecSortedCopy = isSpecSorted;
-
- spec = new Spectrum();
- int startScanNum = Integer.parseInt(token[1]);
- int endScanNum = Integer.parseInt(token[2]);
- float precursorMz = Float.parseFloat(token[3]);
- spec = new Spectrum(precursorMz, 0, 0);
- spec.setStartScanNum(startScanNum);
- spec.setEndScanNum(endScanNum);
- isSpecSorted = true;
-
- if (!isSpecSortedCopy)
- Collections.sort(specCopy);
- return specCopy;
- } else if (buf.startsWith("Z")) {
- if (!zParsed) {
- int charge = Integer.parseInt(token[1]);
- float precursorMH = Float.parseFloat(token[2]);
- float precursorMz = ((precursorMH - (float) Composition.ChargeCarrierMass()) + charge * (float) Composition.ChargeCarrierMass()) / charge;
- spec.setPrecursor(new Peak(precursorMz, 0, charge));
- zParsed = true;
- } else {
- spec.setPrecursorCharge(0);
- }
- } else if (token.length == 2) // a peak
- {
- assert (spec != null);
- float mass = Float.parseFloat(token[0]);
- if (isSpecSorted && mass < prevMass)
- isSpecSorted = false;
- else
- prevMass = mass;
- float intensity = Float.parseFloat(token[1]);
- spec.add(new Peak(mass, intensity, 1));
- }
- }
-
- if (spec != null) {
- if (!isSpecSorted)
- Collections.sort(spec);
- Spectrum specCopy = spec;
- spec = null;
- return specCopy;
- }
-
- return spec;
- }
-
- /**
- * Read the entire ms2 file and generates a map from spectrum indexes to file positions of spectra.
- *
- * @param lineReader A reader points to the start of the spectrum.
- * @return A Hashtable object maps a spectrum index into a file position.
- */
- public Map getSpecMetaInfoMap(
- BufferedRandomAccessLineReader lineReader) {
- Hashtable specIndexMap = new Hashtable();
- String buf;
- long offset = 0;
- int specIndex = 0;
-
- SpectrumMetaInfo metaInfo = null;
- while ((buf = lineReader.readLine()) != null) {
- if (buf.startsWith("S")) // scan
- {
- specIndex++;
-
- metaInfo = new SpectrumMetaInfo();
- metaInfo.setPosition(offset);
- metaInfo.setID("index=" + (specIndex - 1));
-
- String[] token = buf.split("\\s+");
- if (token.length < 4) {
- System.err.println("Invalid ms2 file format!");
- System.exit(-1);
- }
- float precursorMz = Float.parseFloat(token[3]);
- metaInfo.setPrecursorMz(precursorMz);
- specIndexMap.put(specIndex, metaInfo);
- }
-
- offset = lineReader.getPosition();
- }
- return specIndexMap;
- }
-
- public static void test() throws Exception {
- String fileName = System.getProperty("user.home") + "/Research/Data/QCShew/QC_Shew_12_02_2_1Aug12_Cougar_12-06-11.ms2";
-
- java.util.Map specIndexPrecursorMzMap = new java.util.HashMap();
- int numSpecs;
-
- numSpecs = 0;
- SpectraMap map = new SpectraMap(fileName, new MS2SpectrumParser());
-
- for (int specIndex : map.getSpecIndexList()) {
- Spectrum spec = map.getSpectrumBySpecIndex(specIndex);
- numSpecs++;
- specIndexPrecursorMzMap.put(spec.getSpecIndex(), spec.getPrecursorPeak().getMz());
- }
- System.out.println("NumSpectra: " + numSpecs);
-
-// Spectrum scan87 = map.getSpectrumBySpecIndex(79);
-// System.out.println("**** " + scan87.getPrecursorPeak().getMz()+" "+scan87.getPrecursorPeak().getCharge());
-
- numSpecs = 0;
- SpectraIterator iterator = new SpectraIterator(fileName, new MS2SpectrumParser());
- while (iterator.hasNext()) {
- Spectrum spec = iterator.next();
- numSpecs++;
-
- Float precursorMz = specIndexPrecursorMzMap.get(spec.getSpecIndex());
-// System.out.println(spec.getPrecursorPeak().getMz()+" "+spec.getCharge()+" "+spec.getSpecIndex()+" "+spec.getScanNum());
- if (precursorMz == null || precursorMz != spec.getPrecursorPeak().getMz()) {
- System.out.println(precursorMz + " != " + spec.getPrecursorPeak().getMz());
- System.exit(0);
- }
- }
-
- System.out.println("NumSpectra: " + numSpecs);
- }
-
- public static void main(String argv[]) throws Exception {
- test();
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraIterator.java b/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraIterator.java
deleted file mode 100644
index ecc5709e..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraIterator.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.ScanType;
-import edu.ucsd.msjava.msutil.SpectraIterator;
-import edu.ucsd.msjava.msutil.Spectrum;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-
-public class PNNLSpectraIterator extends SpectraIterator {
-
- private HashMap scanNumScanTypeMap;
-
- public PNNLSpectraIterator(String fileName) throws IOException {
- super(fileName, new PNNLSpectrumParser());
- scanNumScanTypeMap = PNNLSpectrumParser.getScanTypeMap(fileName);
- }
-
- @Override
- public Spectrum next() {
- if (scanNumScanTypeMap == null)
- return super.next();
-
- Spectrum spec = super.next();
- ScanType scanType = scanNumScanTypeMap.get(spec.getScanNum());
- if (scanType != null) {
- spec.setActivationMethod(scanType.getActivationMethod());
- spec.setIsHighPrecision(scanType.isHighPrecision());
- spec.setMsLevel(scanType.getMsLevel());
- spec.setRt(scanType.getScanStartTime());
- spec.setRtIsSeconds(false);
- }
- return spec;
- }
-
- public static void main(String argv[]) throws Exception {
- String fileName = System.getProperty("user.home") + "/Test/Matt/QC_Shew_11_03_200ng_4_23Aug11_Hawk_11-05-04p_dta.txt";
- PNNLSpectraIterator itr = new PNNLSpectraIterator(fileName);
- Iterator specItr = itr.iterator();
- while (specItr.hasNext()) {
- Spectrum spec = specItr.next();
- System.out.println(spec.getScanNum() + "\t" + spec.getActivationMethod());
- }
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraMap.java b/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraMap.java
deleted file mode 100644
index 7adf8b94..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectraMap.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.ScanType;
-import edu.ucsd.msjava.msutil.SpectraMap;
-import edu.ucsd.msjava.msutil.Spectrum;
-
-import java.util.HashMap;
-
-public class PNNLSpectraMap extends SpectraMap {
-
- private HashMap scanNumScanTypeMap;
-
- public PNNLSpectraMap(String fileName) {
- super(fileName, new PNNLSpectrumParser());
- scanNumScanTypeMap = PNNLSpectrumParser.getScanTypeMap(fileName);
- }
-
- @Override
- public synchronized Spectrum getSpectrumBySpecIndex(int specIndex) {
- if (scanNumScanTypeMap == null)
- return super.getSpectrumBySpecIndex(specIndex);
- else {
- Spectrum spec = super.getSpectrumBySpecIndex(specIndex);
- ScanType scanType = scanNumScanTypeMap.get(spec.getScanNum());
- if (scanType != null) {
- spec.setActivationMethod(scanType.getActivationMethod());
- spec.setIsHighPrecision(scanType.isHighPrecision());
- spec.setMsLevel(scanType.getMsLevel());
- spec.setRt(scanType.getScanStartTime());
- spec.setRtIsSeconds(false);
- }
-
- return spec;
- }
- }
-
- public static void main(String argv[]) throws Exception {
- String fileName = System.getProperty("user.home") + "/Test/Matt/QC_Shew_11_03_200ng_4_23Aug11_Hawk_11-05-04p_dta.txt";
- PNNLSpectraMap map = new PNNLSpectraMap(fileName);
- for (int specIndex : map.getSpecIndexList()) {
- Spectrum spec = map.getSpectrumBySpecIndex(specIndex);
- System.out.println(spec.getScanNum() + "\t" + spec.getActivationMethod());
- }
- }
-
-
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectrumParser.java b/src/main/java/edu/ucsd/msjava/parser/PNNLSpectrumParser.java
deleted file mode 100644
index 0b3c6e09..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/PNNLSpectrumParser.java
+++ /dev/null
@@ -1,256 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.*;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.*;
-
-public class PNNLSpectrumParser implements SpectrumParser {
-
- public static final String SCAN_TYPE_FILE_EXTENSION = "_ScanType.txt";
-
- /**
- * Number of scans where we could not determine the scan number
- * This method is required by interface SpectrumParser
- * However, this class does not keep track of spectra without a scan number
- *
- * @return
- */
-
- public long getScanMissingWarningCount() {
- return 0;
- }
- public Spectrum readSpectrum(LineReader lineReader) {
- Spectrum spec = null;
-
- String buf;
- float prevMass = 0;
- boolean isSorted = true;
-
- while ((buf = lineReader.readLine()) != null) {
- if (buf.length() == 0) {
- if (spec != null) {
- if (!isSorted)
- Collections.sort(spec);
- return spec;
- } else
- continue;
- } else if (buf.startsWith("==")) {
- if (spec != null) {
- System.out.println("There must be at least one empty line between spectra: " + buf);
- System.exit(-1);
- }
- int lastDotIndex = buf.lastIndexOf('.');
- int secondLastDotIndex = buf.lastIndexOf('.', lastDotIndex - 1);
- int thirdLastDotIndex = buf.lastIndexOf('.', secondLastDotIndex - 1);
- int fourthLastDotIndex = buf.lastIndexOf('.', thirdLastDotIndex - 1);
-
- int scanNum = Integer.parseInt(buf.substring(fourthLastDotIndex + 1, thirdLastDotIndex));
-
- String annotation = buf;
- // first line of a spectrum
- buf = lineReader.readLine();
- if (buf == null || buf.trim().length() == 0) {
- System.out.println("Error while parsing _Dta.txt file: " + annotation);
- System.out.println("No spectrum!");
- System.exit(-1);
- }
-
- spec = new Spectrum();
- String[] token = buf.split("\\s+");
- float mPlusH = Float.parseFloat(token[0]);
- int charge = Integer.parseInt(token[1].substring(token[1].indexOf('=') + 1));
- float precursorMz = (mPlusH - (float) Composition.ChargeCarrierMass()) / charge + (float) Composition.ChargeCarrierMass();
- spec.setPrecursor(new Peak(precursorMz, 0, charge));
- spec.setScanNum(scanNum);
- } else if (Character.isDigit(buf.charAt(0))) // peak
- {
- if (spec == null) {
- System.out.println("Error while parsing _Dta.txt file.");
- System.out.println("Header line is missing: " + buf);
- System.exit(-1);
- }
- String[] token2 = buf.split("\\s+");
- if (token2.length != 2)
- continue;
- float mass = Float.parseFloat(token2[0]);
- if (isSorted && mass < prevMass)
- isSorted = false;
-
- float intensity = Float.parseFloat(token2[1]);
- spec.add(new Peak(mass, intensity, 1));
- prevMass = mass;
- }
- }
- return spec;
- }
-
- @Override
- public Map getSpecMetaInfoMap(BufferedRandomAccessLineReader lineReader) {
- Hashtable specIndexMap = new Hashtable();
- String buf;
- long offset = 0;
- int specIndex = 0;
- while ((buf = lineReader.readLine()) != null) {
- if (buf.startsWith("==")) {
-// specIndexMap.put(++specIndex, offset);
- ++specIndex;
- int lastDotIndex = buf.lastIndexOf('.');
- int secondLastDotIndex = buf.lastIndexOf('.', lastDotIndex - 1);
- int thirdLastDotIndex = buf.lastIndexOf('.', secondLastDotIndex - 1);
- int fourthLastDotIndex = buf.lastIndexOf('.', thirdLastDotIndex - 1);
-
- String annotation = buf;
- // first line of a spectrum
- buf = lineReader.readLine();
- if (buf == null || buf.trim().length() == 0) {
- System.out.println("Error while parsing _Dta.txt file: " + annotation);
- System.out.println("No spectrum!");
- System.exit(-1);
- }
-
- String[] token = buf.split("\\s+");
- float mPlusH = Float.parseFloat(token[0]);
- int charge = Integer.parseInt(token[1].substring(token[1].indexOf('=') + 1));
- float precursorMz = (mPlusH - (float) Composition.ChargeCarrierMass()) / charge + (float) Composition.ChargeCarrierMass();
-
- SpectrumMetaInfo metaInfo = new SpectrumMetaInfo();
- metaInfo.setID("index=" + (specIndex - 1));
- metaInfo.setPrecursorMz(precursorMz);
- metaInfo.setPosition(offset);
- specIndexMap.put(specIndex, metaInfo);
- }
- offset = lineReader.getPosition();
- }
- return specIndexMap;
- }
-
-// static class ScanType
-// {
-// public ScanType(ActivationMethod activationMethod,
-// boolean isHighPrecision) {
-// this.activationMethod = activationMethod;
-// this.isHighPrecision = isHighPrecision;
-// }
-//
-// ActivationMethod getActivationMethod() {
-// return activationMethod;
-// }
-// boolean isHighPrecision() {
-// return isHighPrecision;
-// }
-//
-// private ActivationMethod activationMethod;
-// private boolean isHighPrecision;
-// }
-
- static HashMap getScanTypeMap(String fileName) {
- File specFile = new File(fileName);
- String scanTypeFileName =
- specFile.getAbsoluteFile().getParentFile().getPath()
- + File.separator
- + specFile.getName().substring(0, specFile.getName().lastIndexOf('_'))
- + PNNLSpectrumParser.SCAN_TYPE_FILE_EXTENSION;
- File scanTypeFile = new File(scanTypeFileName);
-
- if (!scanTypeFile.exists())
- return null;
-
- HashMap scanNumScanTypeMap = new HashMap();
-
- BufferedLineReader in = null;
- try {
- in = new BufferedLineReader(scanTypeFile.getPath());
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- String s;
-
- s = in.readLine(); // header
- boolean hasScanTimes = false;
- String[] hTokens = s.split("\t");
- if (hTokens.length > 3 && hTokens[3].toLowerCase().contains("time")) {
- hasScanTimes = true;
- }
-
- while ((s = in.readLine()) != null) {
- String[] token = s.split("\t");
- if (token.length < 3)
- continue;
-
- int scanNum = Integer.parseInt(token[0]);
- String scanType = token[1].toLowerCase();
-
- ActivationMethod method = null;
- if (scanType.contains("etcid"))
- method = ActivationMethod.ETD;
- else if (scanType.contains("ethcd"))
- method = ActivationMethod.ETD;
- else if (scanType.contains("cid"))
- method = ActivationMethod.CID;
- else if (scanType.contains("etd"))
- method = ActivationMethod.ETD;
- else if (scanType.contains("hcd"))
- method = ActivationMethod.HCD;
- else if (scanType.contains("pqd"))
- method = ActivationMethod.PQD;
-
- boolean isHighPrecision = false;
- if (scanType.contains("hms"))
- isHighPrecision = true;
-
- int msLevel = Integer.parseInt(token[2]);
-
- float scanTime = -1;
- if (hasScanTimes && token.length > 3) {
- scanTime = Float.parseFloat(token[3]);
- }
-
- if (method != null) {
- scanNumScanTypeMap.put(scanNum, new ScanType(method, isHighPrecision, msLevel, scanTime));
- }
- }
-
- if (in != null) {
- try {
- in.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return scanNumScanTypeMap;
- }
-
- public static void main(String argv[]) throws Exception {
- long time = System.currentTimeMillis();
- String fileName = System.getProperty("user.home") + "/Research/ToolDistribution/PNNLTest/QC_Shew_08_04_pt5_b_22Jan09_Owl_09-01-04_dta.txt";
- SpectraIterator itr = new SpectraIterator(fileName, new PNNLSpectrumParser());
- int numSpecs = 0;
- HashSet scanNumSet = new HashSet();
- while (itr.hasNext()) {
- Spectrum spec = itr.next();
- numSpecs++;
- if (scanNumSet.contains(spec.getScanNum())) {
- System.out.println(spec.getScanNum());
- } else
- scanNumSet.add(spec.getScanNum());
-// System.out.println(spec+ "\t" + spec.getScanNum()+"\t"+(spec.getPrecursorMass()+(float)Composition.ChargeCarrierMass)+"\t"+spec.getCharge());
- }
- System.out.println("NumSpecs: " + numSpecs);
- System.out.println("Time: " + (System.currentTimeMillis() - time));
-
- time = System.currentTimeMillis();
- SpectraMap map = new SpectraMap(fileName, new PNNLSpectrumParser());
- numSpecs = 0;
- for (int specIndex : map.getSpecIndexList()) {
- Spectrum spec = map.getSpectrumBySpecIndex(specIndex);
- numSpecs++;
-// System.out.println(spec+ "\t" + spec.getScanNum()+"\t"+(spec.getPrecursorMass()+(float)Composition.ChargeCarrierMass)+"\t"+spec.getCharge());
- }
- System.out.println("NumSpecs: " + numSpecs);
- System.out.println("Time: " + (System.currentTimeMillis() - time));
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/PklSpectrumParser.java b/src/main/java/edu/ucsd/msjava/parser/PklSpectrumParser.java
deleted file mode 100644
index 4129a936..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/PklSpectrumParser.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.*;
-
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.Map;
-
-/**
- * A class that parses Pkl format
- *
- * @author sangtaekim
- */
-public class PklSpectrumParser implements SpectrumParser {
-
- /**
- * Number of scans where we could not determine the scan number
- * This method is required by interface SpectrumParser
- * However, this class does not keep track of spectra without a scan number
- *
- * @return
- */
- public long getScanMissingWarningCount() {
- return 0;
- }
-
- /**
- * Reads a spectrum from pkl file and returns it.
- *
- * @param lineReader A LineReader object points to the start of a spectrum.
- * @return a spectrum object.
- */
- public Spectrum readSpectrum(LineReader lineReader) {
- Spectrum spec = null;
-
- boolean sorted = true;
- float prevMass = 0;
-
- String buf;
- while ((buf = lineReader.readLine()) != null) {
- String[] token = buf.split("\\s+");
- if (token.length == 3) // start of a spectrum
- {
- float precursorMz = Float.parseFloat(token[0]);
- float precursorIntensity = Float.parseFloat(token[1]);
- int charge = Integer.parseInt(token[2]);
- spec = new Spectrum(precursorMz, charge, precursorIntensity);
- } else if (token.length == 2) // a peak
- {
- assert (spec != null);
- float mass = Float.parseFloat(token[0]);
- if (sorted && mass < prevMass)
- sorted = false;
- else
- prevMass = mass;
-// if(token[1].endsWith("null"))
-// token[1] = token[1].substring(0, token[1].lastIndexOf("null"));
- float intensity = Float.parseFloat(token[1]);
- spec.add(new Peak(mass, intensity, 1));
- } else // end of a spectrum
- {
- if (spec != null) {
- if (!sorted)
- Collections.sort(spec);
- return spec;
- }
- }
- }
- return spec;
- }
-
- /**
- * Read the entire pkl file and generates a map from spectrum indexes to file positions of spectra.
- *
- * @param lineReader A reader points to the start of the spectrum.
- * @return A Hashtable object maps a spectrum index into a file position.
- */
- public Map getSpecMetaInfoMap(
- BufferedRandomAccessLineReader lineReader) {
- Hashtable specIndexMap = new Hashtable();
- String buf;
- long offset = 0;
- int specIndex = 0;
- while ((buf = lineReader.readLine()) != null) {
- String[] token = buf.split("\\s+");
- if (token.length == 3) // start of a spectrum
- {
-// specIndexMap.put(++specIndex, offset);
- ++specIndex;
- float precursorMz = Float.parseFloat(token[0]);
- SpectrumMetaInfo metaInfo = new SpectrumMetaInfo();
- metaInfo.setID("index=" + (specIndex - 1));
- metaInfo.setPrecursorMz(precursorMz);
- metaInfo.setPosition(offset);
- specIndexMap.put(specIndex, metaInfo);
- }
-
- offset = lineReader.getPosition();
- }
- return specIndexMap;
- }
-
- public static void test() throws Exception {
- String fileName = System.getProperty("user.home") + "/Research/ToolDistribution/RefTest/SpecFormatTest/TestSpectra.pkl";
- SpectraIterator iterator = new SpectraIterator(fileName, new PklSpectrumParser());
- int numSpecs = 0;
- while (iterator.hasNext()) {
- Spectrum spec = iterator.next();
- numSpecs++;
- System.out.println(spec.getPrecursorPeak().getMz() + " " + spec.getCharge() + " " + spec.getSpecIndex() + " " + spec.getScanNum());
- }
- System.out.println("NumSpectra: " + numSpecs);
-
- numSpecs = 0;
- SpectraMap map = new SpectraMap(fileName, new PklSpectrumParser());
- for (int specIndex : map.getSpecIndexList()) {
- Spectrum spec = map.getSpectrumBySpecIndex(specIndex);
- numSpecs++;
- System.out.println(spec.getPrecursorPeak().getMz() + " " + spec.getCharge() + " " + spec.getSpecIndex() + " " + spec.getScanNum());
- }
- System.out.println("NumSpectra: " + numSpecs);
- }
-
- public static void main(String argv[]) throws Exception {
- test();
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/SPTxtParser.java b/src/main/java/edu/ucsd/msjava/parser/SPTxtParser.java
deleted file mode 100644
index ecd13ae1..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/SPTxtParser.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import edu.ucsd.msjava.msutil.*;
-
-import java.util.Hashtable;
-import java.util.Map;
-
-public class SPTxtParser implements SpectrumParserWithTitle {
-
- /**
- * Number of scans where we could not determine the scan number
- * This method is required by interface SpectrumParser
- * However, this class does not keep track of spectra without a scan number
- * @return
- */
- public long getScanMissingWarningCount()
- {
- return 0;
- }
-
- public Spectrum readSpectrum(LineReader lineReader) {
- Spectrum spec = null;
-
- String buf;
-
- buf = lineReader.readLine(); // Name: n[43]GAAA....MAR/1
- String[] nameToken = buf.split("\\s+");
- String name = nameToken[1];
- Pair namePair = parseSPTXTName(name);
-
- String pepSeq = namePair.getFirst();
- int precursorCharge = namePair.getSecond();
-
- spec = new Spectrum();
- Peptide pep = new Peptide(pepSeq, AminoAcidSet.getStandardAminoAcidSet());
- spec.setAnnotation(pep);
- spec.setTitle(namePair.getFirst() + ":" + namePair.getSecond());
-
- float precursorMz = 0;
- boolean parse = false;
- while ((buf = lineReader.readLine()) != null) {
- if (buf.startsWith("NumPeaks:")) {
- parse = true;
- } else if (buf.startsWith("PrecursorMZ")) {
- String[] token = buf.split("\\s+");
- precursorMz = Float.parseFloat(token[1]);
- } else if (buf.trim().length() == 0) {
- assert (spec != null);
- spec.setPrecursor(new Peak(precursorMz, 0, precursorCharge));
- return spec;
- } else if (parse && Character.isDigit(buf.charAt(0))) {
- String[] token = buf.split("\\s+");
- if (token.length < 2)
- continue;
- float mass = Float.parseFloat(token[0]);
- float intensity = Float.parseFloat(token[1]);
- spec.add(new Peak(mass, intensity, 1));
- }
- }
- return null;
- }
-
- public Map getSpecMetaInfoMap(BufferedRandomAccessLineReader lineReader) {
- Hashtable specIndexMap = new Hashtable();
- String buf;
- long offset = 0;
- int specIndex = 0;
- SpectrumMetaInfo metaInfo = null;
- while ((buf = lineReader.readLine()) != null) {
- if (buf.startsWith("Name:")) {
- specIndex++;
- metaInfo = new SpectrumMetaInfo();
- metaInfo.setID("index=" + (specIndex - 1));
- metaInfo.setPosition(offset);
- specIndexMap.put(specIndex, metaInfo);
- } else if (buf.startsWith("PrecursorMZ")) {
- String[] token = buf.split("\\s+");
- float precursorMz = Float.parseFloat(token[1]);
- metaInfo.setPrecursorMz(precursorMz);
- }
- offset = lineReader.getPosition();
- }
- return specIndexMap;
- }
-
- public Hashtable getTitleToSpecIndexMap(BufferedRandomAccessLineReader lineReader) {
- Hashtable titleToSpecIndexMap = new Hashtable();
- String buf;
- int specIndex = 0;
- while ((buf = lineReader.readLine()) != null) {
- if (buf.startsWith("Name:")) {
- specIndex++;
- Pair pair = parseSPTXTName(buf.split("\\s+")[1]);
- titleToSpecIndexMap.put(pair.getFirst() + ":" + pair.getSecond(), specIndex);
- }
- }
- return titleToSpecIndexMap;
- }
-
- public static Pair parseSPTXTName(String name) {
- String annotationStr = name.substring(0, name.lastIndexOf('/'));
- StringBuffer pepBuf = new StringBuffer();
- int startIndex = 0;
- if (annotationStr.startsWith("n[43]")) {
- pepBuf.append("+42");
- startIndex = 5;
- }
- char prevAA = '\0';
- for (int i = startIndex; i < annotationStr.length(); i++) {
- char c = annotationStr.charAt(i);
- if (Character.isUpperCase(c))
- pepBuf.append(c);
- else if (c == '[') {
- StringBuffer massBuf = new StringBuffer();
- while (annotationStr.charAt(++i) != ']')
- massBuf.append(annotationStr.charAt(i));
- int mass = Integer.parseInt(massBuf.toString());
- int residueMass = AminoAcidSet.getStandardAminoAcidSet().getAminoAcid(prevAA).getNominalMass();
- int delMass = mass - residueMass;
- if (delMass > 0)
- pepBuf.append("+");
- pepBuf.append(delMass);
- }
- prevAA = c;
- }
-
- int charge = Integer.parseInt(name.substring(name.lastIndexOf('/') + 1));
-
- return new Pair(pepBuf.toString(), charge);
- }
-
- public static void main(String argv[]) throws Exception {
- String fileName = "/home/sangtaekim/Research/Data/NISTLib/human_targetdecoy_spectrast.sptxt";
- SpectraMapByTitle map = new SpectraMapByTitle(fileName, new SPTxtParser());
- System.out.println("Parsing complete.");
- Spectrum spec = map.getSpectrumByTitle("+42AAAAAAGAGPEM+16VRGQVFDVGPR:3");
- System.out.println(spec.getSpecIndex() + "\t" + spec.size());
-
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/SpectrumParserWithTitle.java b/src/main/java/edu/ucsd/msjava/parser/SpectrumParserWithTitle.java
deleted file mode 100644
index 2be75af0..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/SpectrumParserWithTitle.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import java.util.Hashtable;
-
-
-public interface SpectrumParserWithTitle extends SpectrumParser {
- Hashtable getTitleToSpecIndexMap(BufferedRandomAccessLineReader lineReader); // title -> specIndex
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/TSVParser.java b/src/main/java/edu/ucsd/msjava/parser/TSVParser.java
deleted file mode 100644
index 6d661ad8..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/TSVParser.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-
-public class TSVParser {
- public TSVParser() {
-
- }
-
- private HashMap> map = new HashMap>();
-
- public ArrayList getList(String label) {
- return map.get(label);
- }
-
- public void parse(String fileName) {
- BufferedLineReader in = null;
- try {
- in = new BufferedLineReader(fileName);
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- String labelRow = in.readLine();
- String[] labelArr = labelRow.split("\t");
- for (String label : labelArr)
- map.put(label, new ArrayList());
-
- String s;
- while ((s = in.readLine()) != null) {
- if (s.startsWith("#"))
- continue;
- String[] token = s.split("\t");
- if (token.length != labelArr.length)
- continue;
- for (int i = 0; i < labelArr.length; i++)
- map.get(labelArr[i]).add(token[i]);
- }
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/parser/TSVResultParser.java b/src/main/java/edu/ucsd/msjava/parser/TSVResultParser.java
deleted file mode 100644
index 3cc650cc..00000000
--- a/src/main/java/edu/ucsd/msjava/parser/TSVResultParser.java
+++ /dev/null
@@ -1,133 +0,0 @@
-package edu.ucsd.msjava.parser;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-public class TSVResultParser {
- private File tsvFile;
- private Set pepSet;
- private Set scanSet;
- private Set idSet;
- private Map idToSpecEValue;
-
- public TSVResultParser(File tsvFile) {
- this.tsvFile = tsvFile;
- }
-
- public Set getPepSet() {
- return pepSet;
- }
-
- public Set getScanSet() {
- return scanSet;
- }
-
- public Set getIdSet() {
- return idSet;
- }
-
- public Float getSpecEValue(String id) {
- return idToSpecEValue.get(id);
- }
-
- public String parse(float fdrThreshold) {
- BufferedLineReader in = null;
- try {
- in = new BufferedLineReader(tsvFile.getPath());
- } catch (IOException e) {
- e.printStackTrace();
- }
- String header = in.readLine();
- if (!header.startsWith("#") && !header.startsWith("Result"))
- return "No header!";
-
- String[] headerToken = header.split("\t");
- int specQValueColNum = -1;
- int pepQValueColNum = -1;
- int pepColNum = -1;
- int scanNumCol = -1;
- int idCol = -1;
- int specEValueCol = -1;
- for (int i = 0; i < headerToken.length; i++) {
- if (headerToken[i].equalsIgnoreCase("FDR") || headerToken[i].equalsIgnoreCase("QValue") || headerToken[i].equalsIgnoreCase("q-value"))
- specQValueColNum = i;
- if (headerToken[i].equalsIgnoreCase("PepFDR") || headerToken[i].equalsIgnoreCase("PepQValue"))
- pepQValueColNum = i;
- if (headerToken[i].equalsIgnoreCase("Peptide") || headerToken[i].equalsIgnoreCase("Annotation"))
- pepColNum = i;
- if (headerToken[i].equalsIgnoreCase("ScanNum") || headerToken[i].equalsIgnoreCase("Scan#") || headerToken[i].equalsIgnoreCase("Scan"))
- scanNumCol = i;
- if (headerToken[i].equalsIgnoreCase("SpecID"))
- idCol = i;
- if (headerToken[i].equalsIgnoreCase("SpecEValue") || headerToken[i].equalsIgnoreCase("SpecProb"))
- specEValueCol = i;
- }
- if (specQValueColNum < 0)
- return "QValue column is missing!";
- if (pepQValueColNum < 0)
- return "PepQValue column is missing!";
- if (pepColNum < 0)
- return "Annotation column is missing!";
- if (scanNumCol < 0)
- return "Scan column is missing!";
- if (idCol < 0)
- return "SpecID column is missing!";
- if (specEValueCol < 0)
- return "SpecEValue column is missing!";
-
- String s;
- pepSet = new HashSet();
- scanSet = new HashSet();
- idSet = new HashSet();
- idToSpecEValue = new HashMap();
- while ((s = in.readLine()) != null) {
- if (s.startsWith("#"))
- continue;
- String[] token = s.split("\t");
- if (token.length <= specQValueColNum || token.length <= pepQValueColNum || token.length <= pepColNum
- || token.length <= idCol || token.length <= specEValueCol)
- continue;
- double specQValue = Double.parseDouble(token[specQValueColNum]);
- double pepQValue = Double.parseDouble(token[pepQValueColNum]);
- float specEValue = Float.parseFloat(token[specEValueCol]);
-// if(token[scanNumCol].equals("6804"))
-// System.out.println("Debug");
- idToSpecEValue.put(token[idCol], specEValue);
-
- if (specQValue <= fdrThreshold) {
- scanSet.add(token[scanNumCol]);
- idSet.add(token[idCol]);
- }
- if (pepQValue <= fdrThreshold) {
- String annotation = token[pepColNum];
-
- String pepStr;
-
- if (annotation.matches("[A-Z\\-_]?\\..+\\.[A-Z\\-_]?"))
- pepStr = annotation.substring(annotation.indexOf('.') + 1, annotation.lastIndexOf('.'));
- else
- pepStr = annotation;
-
- StringBuffer unmodStr = new StringBuffer();
- for (int i = 0; i < pepStr.length(); i++)
- if (Character.isLetter(pepStr.charAt(i)))
- unmodStr.append(pepStr.charAt(i));
-
- pepSet.add(unmodStr.toString());
- }
- }
-
- try {
- in.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- return null;
- }
-}
diff --git a/src/test/java/msgfplus/TestMisc.java b/src/test/java/msgfplus/TestMisc.java
index 7af58d30..467af263 100644
--- a/src/test/java/msgfplus/TestMisc.java
+++ b/src/test/java/msgfplus/TestMisc.java
@@ -23,7 +23,6 @@
import edu.ucsd.msjava.msutil.Protocol;
import edu.ucsd.msjava.msutil.SpectraAccessor;
import edu.ucsd.msjava.msutil.Spectrum;
-import edu.ucsd.msjava.parser.TSVParser;
public class TestMisc {
@@ -145,79 +144,6 @@ public void generateTRexPRMSpectrum()
System.out.println("END IONS");
}
- @Test
- @Ignore
- public void generateTRexPRMSpectra()
- {
- File outputFile = new File("D:\\Research\\Data\\TRex\\MaxCharge4\\TRex48216_Vectors.txt");
- PrintStream out = null;
- try {
- out = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
-
- AminoAcidSet aaSet = AminoAcidSet.getStandardAminoAcidSet();
- File idFile = new File("D:\\Research\\Data\\TRex\\MaxCharge4\\NoDecoy.tsv");
- HashMap titleToNominalMass = new HashMap();
- TSVParser parser = new TSVParser();
- parser.parse(idFile.getPath());
- ArrayList titleList = parser.getList("Title");
- ArrayList peptideList = parser.getList("Peptide");
- ArrayList specEValueList = parser.getList("SpecEValue");
- for(int i=0; i 1E-10) continue;
- Peptide peptide = new Peptide(peptideList.get(i), aaSet);
- int nominalMass = peptide.getNominalMass();
- String title = titleList.get(i);
- titleToNominalMass.put(title, nominalMass);
- }
-
- NewRankScorer scorer = NewScorerFactory.get(ActivationMethod.CID, InstrumentType.LOW_RESOLUTION_LTQ, Enzyme.TRYPSIN, Protocol.STANDARD);
- scorer.doNotUseError();
-
- File specFile = new File("D:\\Research\\Data\\TRex\\TRex48216.mgf");
- SpectraAccessor accessor = new SpectraAccessor(specFile);
- Iterator itr = accessor.getSpecItr();
- while(itr.hasNext())
- {
- Spectrum spec = accessor.getSpecItr().next();
- String title = spec.getTitle();
- int nominalMass;
- if(titleToNominalMass.containsKey(title)) nominalMass = titleToNominalMass.get(title);
- else nominalMass = NominalMass.toNominalMass(spec.getPrecursorMass()) - 18;
-
- NewScoredSpectrum scoredSpec = scorer.getScoredSpectrum(spec);
-
- // PRM spectrum
- //out.println("BEGIN IONS");
- out.println("SCAN="+spec.getScanNum());
-// if(spec.getTitle() != null)
-// out.println(" " + spec.getTitle());
-// else
-// out.println();
-// if(spec.getAnnotation() != null)
-// out.println("SEQ=" + spec.getAnnotationStr());
-// out.println("PEPMASS=" + spec.getPrecursorPeak().getMz());
- out.println("PEPTIDE_MASS=" + nominalMass);
-// out.println("SCANS=" + spec.getScanNum());
-// out.println("CHARGE="+spec.getCharge()+"+");
-
-// int peptideNominalMass = 1272;
- for(int m=1; m
Date: Mon, 27 Apr 2026 06:47:40 +0100
Subject: [PATCH 21/34] refactor: rename parser/ package to mgf/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Pure file-move + package/import update; no behaviour change.
- git mv the 6 remaining files under parser/ into mgf/
(BufferedLineReader, BufferedRandomAccessLineReader, LineReader,
MgfSpectrumParser, SpectrumParser, UnicodeBOMInputStream)
- Update package declaration in each moved file from
edu.ucsd.msjava.parser → edu.ucsd.msjava.mgf
- Update import edu.ucsd.msjava.parser.* → edu.ucsd.msjava.mgf.*
across 13 callers in fdr/, mzml/, msutil/, msscorer/, msdbsearch/
---
src/main/java/edu/ucsd/msjava/fdr/ComputeQValue.java | 2 +-
.../edu/ucsd/msjava/{parser => mgf}/BufferedLineReader.java | 2 +-
.../{parser => mgf}/BufferedRandomAccessLineReader.java | 2 +-
.../java/edu/ucsd/msjava/{parser => mgf}/LineReader.java | 2 +-
.../edu/ucsd/msjava/{parser => mgf}/MgfSpectrumParser.java | 2 +-
.../edu/ucsd/msjava/{parser => mgf}/SpectrumParser.java | 2 +-
.../ucsd/msjava/{parser => mgf}/UnicodeBOMInputStream.java | 2 +-
src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java | 2 +-
.../java/edu/ucsd/msjava/msdbsearch/LibraryScanner.java | 2 +-
.../edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java | 2 +-
.../msscorer/ScoringParameterGeneratorWithErrors.java | 2 +-
src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java | 2 +-
src/main/java/edu/ucsd/msjava/msutil/SpecKey.java | 2 +-
src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java | 4 ++--
src/main/java/edu/ucsd/msjava/msutil/SpectraContainer.java | 2 +-
src/main/java/edu/ucsd/msjava/msutil/SpectraIterator.java | 6 +++---
src/main/java/edu/ucsd/msjava/msutil/SpectraMap.java | 4 ++--
src/main/java/edu/ucsd/msjava/msutil/UserParam.java | 2 +-
.../java/edu/ucsd/msjava/mzml/StaxMzMLSpectraIterator.java | 2 +-
19 files changed, 23 insertions(+), 23 deletions(-)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/BufferedLineReader.java (94%)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/BufferedRandomAccessLineReader.java (99%)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/LineReader.java (63%)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/MgfSpectrumParser.java (99%)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/SpectrumParser.java (94%)
rename src/main/java/edu/ucsd/msjava/{parser => mgf}/UnicodeBOMInputStream.java (99%)
diff --git a/src/main/java/edu/ucsd/msjava/fdr/ComputeQValue.java b/src/main/java/edu/ucsd/msjava/fdr/ComputeQValue.java
index 28196b04..d136b894 100644
--- a/src/main/java/edu/ucsd/msjava/fdr/ComputeQValue.java
+++ b/src/main/java/edu/ucsd/msjava/fdr/ComputeQValue.java
@@ -1,6 +1,6 @@
package edu.ucsd.msjava.fdr;
-import edu.ucsd.msjava.parser.BufferedLineReader;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
import edu.ucsd.msjava.cli.MSGFPlus;
import java.io.File;
diff --git a/src/main/java/edu/ucsd/msjava/parser/BufferedLineReader.java b/src/main/java/edu/ucsd/msjava/mgf/BufferedLineReader.java
similarity index 94%
rename from src/main/java/edu/ucsd/msjava/parser/BufferedLineReader.java
rename to src/main/java/edu/ucsd/msjava/mgf/BufferedLineReader.java
index 3eddae90..d068aed6 100644
--- a/src/main/java/edu/ucsd/msjava/parser/BufferedLineReader.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/BufferedLineReader.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
import java.io.*;
diff --git a/src/main/java/edu/ucsd/msjava/parser/BufferedRandomAccessLineReader.java b/src/main/java/edu/ucsd/msjava/mgf/BufferedRandomAccessLineReader.java
similarity index 99%
rename from src/main/java/edu/ucsd/msjava/parser/BufferedRandomAccessLineReader.java
rename to src/main/java/edu/ucsd/msjava/mgf/BufferedRandomAccessLineReader.java
index 3216e238..a3422380 100644
--- a/src/main/java/edu/ucsd/msjava/parser/BufferedRandomAccessLineReader.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/BufferedRandomAccessLineReader.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
import org.apache.commons.lang3.tuple.Pair;
diff --git a/src/main/java/edu/ucsd/msjava/parser/LineReader.java b/src/main/java/edu/ucsd/msjava/mgf/LineReader.java
similarity index 63%
rename from src/main/java/edu/ucsd/msjava/parser/LineReader.java
rename to src/main/java/edu/ucsd/msjava/mgf/LineReader.java
index c0f31e74..f0217a4a 100644
--- a/src/main/java/edu/ucsd/msjava/parser/LineReader.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/LineReader.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
public interface LineReader {
String readLine();
diff --git a/src/main/java/edu/ucsd/msjava/parser/MgfSpectrumParser.java b/src/main/java/edu/ucsd/msjava/mgf/MgfSpectrumParser.java
similarity index 99%
rename from src/main/java/edu/ucsd/msjava/parser/MgfSpectrumParser.java
rename to src/main/java/edu/ucsd/msjava/mgf/MgfSpectrumParser.java
index e805a781..e8ed1e80 100644
--- a/src/main/java/edu/ucsd/msjava/parser/MgfSpectrumParser.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/MgfSpectrumParser.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
import edu.ucsd.msjava.msutil.*;
diff --git a/src/main/java/edu/ucsd/msjava/parser/SpectrumParser.java b/src/main/java/edu/ucsd/msjava/mgf/SpectrumParser.java
similarity index 94%
rename from src/main/java/edu/ucsd/msjava/parser/SpectrumParser.java
rename to src/main/java/edu/ucsd/msjava/mgf/SpectrumParser.java
index f659b055..86856b18 100644
--- a/src/main/java/edu/ucsd/msjava/parser/SpectrumParser.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/SpectrumParser.java
@@ -1,4 +1,4 @@
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
import edu.ucsd.msjava.msutil.Spectrum;
import edu.ucsd.msjava.msutil.SpectrumMetaInfo;
diff --git a/src/main/java/edu/ucsd/msjava/parser/UnicodeBOMInputStream.java b/src/main/java/edu/ucsd/msjava/mgf/UnicodeBOMInputStream.java
similarity index 99%
rename from src/main/java/edu/ucsd/msjava/parser/UnicodeBOMInputStream.java
rename to src/main/java/edu/ucsd/msjava/mgf/UnicodeBOMInputStream.java
index 87dce4d1..67a70b53 100644
--- a/src/main/java/edu/ucsd/msjava/parser/UnicodeBOMInputStream.java
+++ b/src/main/java/edu/ucsd/msjava/mgf/UnicodeBOMInputStream.java
@@ -1,6 +1,6 @@
// (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz)
-package edu.ucsd.msjava.parser;
+package edu.ucsd.msjava.mgf;
import java.io.IOException;
import java.io.InputStream;
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java b/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java
index d16d4524..2ac946af 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/DBScanner.java
@@ -6,7 +6,7 @@
import edu.ucsd.msjava.msscorer.SimpleDBSearchScorer;
import edu.ucsd.msjava.msutil.*;
import edu.ucsd.msjava.msutil.Modification.Location;
-import edu.ucsd.msjava.parser.BufferedLineReader;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
import edu.ucsd.msjava.sequences.Constants;
import java.io.*;
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/LibraryScanner.java b/src/main/java/edu/ucsd/msjava/msdbsearch/LibraryScanner.java
index 5f6821a7..5f7fb7a8 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/LibraryScanner.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/LibraryScanner.java
@@ -4,7 +4,7 @@
import edu.ucsd.msjava.msscorer.SimpleDBSearchScorer;
import edu.ucsd.msjava.msutil.*;
import edu.ucsd.msjava.msutil.Modification.Location;
-import edu.ucsd.msjava.parser.BufferedLineReader;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
import java.io.FileNotFoundException;
import java.io.IOException;
diff --git a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
index 1c570c37..62fee4b4 100644
--- a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
+++ b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
@@ -5,7 +5,7 @@
import edu.ucsd.msjava.msgf.Tolerance;
import edu.ucsd.msjava.msscorer.NewScorerFactory.SpecDataType;
import edu.ucsd.msjava.msutil.*;
-import edu.ucsd.msjava.parser.MgfSpectrumParser;
+import edu.ucsd.msjava.mgf.MgfSpectrumParser;
import java.io.File;
import java.util.*;
diff --git a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
index d2891aed..8cedf8e6 100644
--- a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
+++ b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
@@ -7,7 +7,7 @@
import edu.ucsd.msjava.msscorer.NewScorerFactory.SpecDataType;
import edu.ucsd.msjava.msutil.*;
import edu.ucsd.msjava.msutil.IonType.PrefixIon;
-import edu.ucsd.msjava.parser.MgfSpectrumParser;
+import edu.ucsd.msjava.mgf.MgfSpectrumParser;
import java.io.File;
import java.util.*;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java b/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
index ee05daa9..102961bf 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
@@ -3,7 +3,7 @@
import edu.ucsd.msjava.cli.MSGFPlusOptions;
import edu.ucsd.msjava.msdbsearch.SearchParams;
import edu.ucsd.msjava.msutil.Modification.Location;
-import edu.ucsd.msjava.parser.BufferedLineReader;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
import java.io.File;
import java.io.IOException;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java b/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
index b222c8be..c03a0ee0 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
@@ -1,6 +1,6 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.util.ArrayList;
import java.util.Collections;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
index 56b51ef3..223644b4 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpectraAccessor.java
@@ -3,8 +3,8 @@
import edu.ucsd.msjava.mzml.StaxMzMLParser;
import edu.ucsd.msjava.mzml.StaxMzMLSpectraIterator;
import edu.ucsd.msjava.mzml.StaxMzMLSpectraMap;
-import edu.ucsd.msjava.parser.MgfSpectrumParser;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.MgfSpectrumParser;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.io.File;
import java.io.IOException;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraContainer.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraContainer.java
index 0435d1eb..b0cad8be 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraContainer.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpectraContainer.java
@@ -1,6 +1,6 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.io.*;
import java.util.ArrayList;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraIterator.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraIterator.java
index 2962bdc9..7def0abe 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraIterator.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpectraIterator.java
@@ -1,8 +1,8 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.parser.BufferedLineReader;
-import edu.ucsd.msjava.parser.LineReader;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
+import edu.ucsd.msjava.mgf.LineReader;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.io.FileNotFoundException;
import java.io.IOException;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpectraMap.java b/src/main/java/edu/ucsd/msjava/msutil/SpectraMap.java
index f82780f1..974c5360 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpectraMap.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpectraMap.java
@@ -1,7 +1,7 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.parser.BufferedRandomAccessLineReader;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.BufferedRandomAccessLineReader;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.util.*;
import java.util.Map.Entry;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/UserParam.java b/src/main/java/edu/ucsd/msjava/msutil/UserParam.java
index 97903fbc..f286fc48 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/UserParam.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/UserParam.java
@@ -1,6 +1,6 @@
package edu.ucsd.msjava.msutil;
-import edu.ucsd.msjava.parser.BufferedLineReader;
+import edu.ucsd.msjava.mgf.BufferedLineReader;
import java.io.FileNotFoundException;
import java.io.IOException;
diff --git a/src/main/java/edu/ucsd/msjava/mzml/StaxMzMLSpectraIterator.java b/src/main/java/edu/ucsd/msjava/mzml/StaxMzMLSpectraIterator.java
index 22dbe1e5..d92ecfb3 100644
--- a/src/main/java/edu/ucsd/msjava/mzml/StaxMzMLSpectraIterator.java
+++ b/src/main/java/edu/ucsd/msjava/mzml/StaxMzMLSpectraIterator.java
@@ -1,7 +1,7 @@
package edu.ucsd.msjava.mzml;
import edu.ucsd.msjava.msutil.Spectrum;
-import edu.ucsd.msjava.parser.SpectrumParser;
+import edu.ucsd.msjava.mgf.SpectrumParser;
import java.util.Iterator;
import java.util.NoSuchElementException;
From 85d0afe24908af05670a8ea7caea2b132bdb4c31 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 06:59:14 +0100
Subject: [PATCH 22/34] fix(cli): CustomAA= config-file crash + 3 picocli
polish issues
The Phase 4c review surfaced four issues that all bottomed out in
small details of the new typed-options path. Bundling the fixes plus
a regression test:
1. **CustomAA= crash (critical, was code review issue #1).**
AminoAcidSet.getAminoAcidSetFromModEntries was prepending
"CustomAA=" to each entry before handing it to parseConfigEntry,
but parseConfigEntry only strips the "nummods=" prefix -- every
other line is split on commas and modInfo[0] is parsed as a mass
or empirical formula. With the prefix attached, modInfo[0] became
"CustomAA=C3H5NO" which fails Double.parseDouble + Composition.getMass
and triggers the System.exit(-1) in the caller. Any -conf file with
a CustomAA= line crashed the process. MSGFPlusOptions.applyConfigEntry
already strips the "Key=" prefix when populating opts.customAAs;
the fix is to drop the literal at AminoAcidSet:826 so the bare
value reaches parseConfigEntry, matching how staticMods/dynamicMods
are passed at line 831. New regression test
MSGFPlusOptionsConfigFileTest.configFileWithCustomAAParsesWithoutCrashing
pins this with a tiny synthetic config file.
2. **-decoy default doc.** @Option description now says "Default: XXX"
(the actual value returned by effectiveDecoyPrefix and the same
constant the legacy code used via MSGFPlus.DEFAULT_DECOY_PROTEIN_PREFIX).
The "Default: DECOY_" string was wrong since Phase 1a.
3. **Single-file spectrum-format null check.** The single-file branch
in SearchParams.parse used to silently store a null SpecFileFormat
into DBSearchIOFiles when the user supplied -s file.bogus, which
later NPE'd at MSGFPlus:305 (specFormat.getPSIName()). It now
short-circuits with the same message the directory branch's
isSupportedSpectrumFormat filter implies:
"Spectrum file extension does not match a supported format
(*.mzML, *.mgf): ".
4. **Unrecognized config-key URL hint.** The legacy parseConfigParamFile
tracked an invalid-parameter counter and, after closing the file,
printed the example-params URL hint exactly once if the count was
non-zero. MSGFPlusOptions.applyConfigFile now restores that
behaviour with a private unrecognizedConfigEntries counter
incremented inside the default branch of applyConfigEntry, plus
the same end-of-file hint.
Scoped tests pass (68 tests, 0 failures, 0 errors).
---
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 14 ++++-
.../ucsd/msjava/msdbsearch/SearchParams.java | 3 +
.../edu/ucsd/msjava/msutil/AminoAcidSet.java | 5 +-
.../cli/MSGFPlusOptionsConfigFileTest.java | 60 +++++++++++++++++++
4 files changed, 80 insertions(+), 2 deletions(-)
create mode 100644 src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index e6416223..7a877754 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -60,7 +60,7 @@ public final class MSGFPlusOptions {
public File outputFile;
@Option(names = "-decoy", paramLabel = "Prefix",
- description = "Decoy protein prefix; Default: DECOY_")
+ description = "Decoy protein prefix; Default: XXX")
public String decoyPrefix;
// ---------- precursor mass tolerance ----------
@@ -331,6 +331,7 @@ public Protocol effectiveProtocol() {
* @return null on success, error string otherwise.
*/
public String applyConfigFile(File file) {
+ int unrecognizedCount = 0;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
int lineNum = 0;
@@ -343,17 +344,27 @@ public String applyConfigFile(File file) {
String rawKey = trimmed.substring(0, eq).trim();
String value = trimmed.substring(eq + 1).trim();
String key = canonicalConfigKey(rawKey);
+ int before = unrecognizedConfigEntries;
String err = applyConfigEntry(key, value, file.getName());
if (err != null) {
return "Error parsing line " + lineNum + " of " + file.getName() + ": " + err;
}
+ if (unrecognizedConfigEntries > before) unrecognizedCount++;
}
} catch (IOException e) {
return "Error reading config file " + file.getPath() + ": " + e.getMessage();
}
+ if (unrecognizedCount > 0) {
+ System.out.println("Valid parameters are described in the example parameter file at " +
+ "https://github.com/MSGFPlus/msgfplus/blob/master/docs/examples/MSGFPlus_Params.txt");
+ }
return null;
}
+ /** Counter incremented inside {@link #applyConfigEntry} whenever an unknown
+ * config-file key is seen; surfaced via the end-of-file URL hint. */
+ private int unrecognizedConfigEntries;
+
private String applyConfigEntry(String key, String value, String fileName) {
// Repeated entries: collect into lists. "none" is treated as no entry.
if (key.equalsIgnoreCase("DynamicMod")) {
@@ -415,6 +426,7 @@ private String applyConfigEntry(String key, String value, String fileName) {
default:
if (!key.toLowerCase().startsWith("enzymedef")) {
System.out.println("Warning, unrecognized parameter '" + key + "=" + value + "' in config file " + fileName);
+ unrecognizedConfigEntries++;
}
return null;
}
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index 2c66a799..9897f010 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -351,6 +351,9 @@ public String parse(MSGFPlusOptions opts) {
if (!specPath.isDirectory()) {
SpecFileFormat specFormat = SpecFileFormat.getSpecFileFormat(specPath.getName());
+ if (!isSupportedSpectrumFormat(specFormat)) {
+ return "Spectrum file extension does not match a supported format (*.mzML, *.mgf): " + specPath.getName();
+ }
File outputFile = opts.outputFile;
if (outputFile == null) {
String outputFilePath = specPath.getPath().substring(0, specPath.getPath().lastIndexOf('.')) + defaultExt;
diff --git a/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java b/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
index 102961bf..96bda071 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/AminoAcidSet.java
@@ -823,7 +823,10 @@ public static AminoAcidSet getAminoAcidSetFromModEntries(
ModificationMetadata modMetadata = new ModificationMetadata(opts.effectiveMaxNumMods());
for (int i = 0; i < customAAEntries.size(); i++) {
- if (!parseConfigEntry(configName, i + 1, "CustomAA=" + customAAEntries.get(i), mods, customAA, modMetadata)) {
+ // parseConfigEntry expects bare comma-separated mod definitions, not
+ // a "Key=value" line. MSGFPlusOptions.applyConfigEntry already strips
+ // the "CustomAA=" prefix when populating opts.customAAs.
+ if (!parseConfigEntry(configName, i + 1, customAAEntries.get(i), mods, customAA, modMetadata)) {
System.exit(-1);
}
}
diff --git a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
new file mode 100644
index 00000000..c900ff01
--- /dev/null
+++ b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
@@ -0,0 +1,60 @@
+package edu.ucsd.msjava.cli;
+
+import edu.ucsd.msjava.msdbsearch.SearchParams;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Regression tests for {@link MSGFPlusOptions#applyConfigFile} and the
+ * downstream {@link SearchParams#parse} path.
+ *
+ * Pins the {@code CustomAA=} crash that was caught in code review: the
+ * legacy hashtable-based config-file reader passed bare values to
+ * {@code AminoAcidSet.parseConfigEntry}, but the modernized adapter
+ * briefly re-prepended {@code "CustomAA="} which {@code parseConfigEntry}
+ * does not strip — every {@code -conf} invocation containing a
+ * {@code CustomAA=} line crashed via {@code System.exit(-1)}.
+ */
+public class MSGFPlusOptionsConfigFileTest {
+
+ @Test
+ public void configFileWithCustomAAParsesWithoutCrashing() throws IOException, URISyntaxException {
+ // Build a minimal config file with the documented CustomAA= form.
+ Path tmpDir = Files.createTempDirectory("msgfplus-customaa-");
+ Path conf = tmpDir.resolve("with_custom_aa.txt");
+ Files.write(conf, ("# Regression for the CustomAA= prefix bug\n"
+ + "CustomAA=C3H5NO, U, custom, U, Selenocysteine\n"
+ + "MinPepLength=7\n").getBytes(StandardCharsets.UTF_8));
+
+ URI specUri = MSGFPlusOptionsConfigFileTest.class.getClassLoader()
+ .getResource("test.mgf").toURI();
+ URI dbUri = MSGFPlusOptionsConfigFileTest.class.getClassLoader()
+ .getResource("Tryp_Pig_Bov.fasta").toURI();
+
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.configFile = conf.toFile();
+ opts.spectrumFile = new File(specUri);
+ opts.databaseFile = new File(dbUri);
+
+ SearchParams params = new SearchParams();
+ String err = params.parse(opts);
+ Assert.assertNull("SearchParams.parse must not crash on a config file with CustomAA= entries: " + err, err);
+
+ // The custom AA list should reach opts.customAAs and be honored downstream.
+ Assert.assertEquals(1, opts.customAAs.size());
+ Assert.assertEquals("config-file MinPepLength=7 should win over the default of 6",
+ 7, opts.effectiveMinPeptideLength());
+
+ // Cleanup.
+ Files.deleteIfExists(conf);
+ Files.deleteIfExists(tmpDir);
+ }
+}
From 8fc6e2b493c0a1741fa5c65f721dfc025afec94d Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 08:08:44 +0100
Subject: [PATCH 23/34] fix(cli): restore -m 4 = UVPD activation method
The Phase 4c MSGFPlusOptions.effectiveActivationMethod() switch
hardcoded indices 0..3 (ASWRITTEN/CID/ETD/HCD) and threw
IllegalArgumentException for index 4. The legacy
addFragMethodParam(ActivationMethod.ASWRITTEN, doNotAddMergeMode=true)
hid the registry's FUSION (slot 4), so the user-facing menu was
0..3 + UVPD at index 4. The Phase 4c rewrite silently dropped UVPD
support; this commit restores it.
- Add case 4: return ActivationMethod.UVPD; to the switch.
- Update the @Option description to enumerate 4=UVPD.
- New unit test (MSGFPlusOptionsActivationMethodTest) pins the
full 0..4 mapping plus the default and the out-of-range guard.
docs/msgfplus.md already documents -m 4 = UVPD; this brings the
code in line with the doc.
---
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 10 +++--
.../MSGFPlusOptionsActivationMethodTest.java | 43 +++++++++++++++++++
2 files changed, 50 insertions(+), 3 deletions(-)
create mode 100644 src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsActivationMethodTest.java
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index 7a877754..b010ab27 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -105,7 +105,7 @@ public final class MSGFPlusOptions {
public Integer tdaStrategy;
@Option(names = "-m", paramLabel = "ID",
- description = "Fragmentation method ID: 0=as written/CID (Default), 1=CID, 2=ETD, 3=HCD")
+ description = "Fragmentation method ID: 0=as written/CID (Default), 1=CID, 2=ETD, 3=HCD, 4=UVPD")
public Integer fragMethodId;
@Option(names = "-inst", paramLabel = "ID",
@@ -285,8 +285,11 @@ public IntRange effectiveSpecIndexRange() {
}
/** Resolves {@code -m} index to {@link ActivationMethod}. MSGFPlus exposes
- * 0=ASWRITTEN, 1=CID, 2=ETD, 3=HCD (FUSION is excluded by
- * {@code addFragMethodParam(..., doNotAddMergeMode=true)}). */
+ * 0=ASWRITTEN, 1=CID, 2=ETD, 3=HCD, 4=UVPD. The registry also defines
+ * FUSION (merge-mode synthetic method) and PQD, but neither is exposed
+ * as a user-selectable index by MSGFPlus -- FUSION was hidden by the
+ * legacy {@code addFragMethodParam(..., doNotAddMergeMode=true)}, which
+ * shifted UVPD from registry slot 5 down to user-facing index 4. */
public ActivationMethod effectiveActivationMethod() {
int idx = fragMethodId != null ? fragMethodId : 0;
switch (idx) {
@@ -294,6 +297,7 @@ public ActivationMethod effectiveActivationMethod() {
case 1: return ActivationMethod.CID;
case 2: return ActivationMethod.ETD;
case 3: return ActivationMethod.HCD;
+ case 4: return ActivationMethod.UVPD;
default: throw new IllegalArgumentException("invalid -m index: " + idx);
}
}
diff --git a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsActivationMethodTest.java b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsActivationMethodTest.java
new file mode 100644
index 00000000..6df6723d
--- /dev/null
+++ b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsActivationMethodTest.java
@@ -0,0 +1,43 @@
+package edu.ucsd.msjava.cli;
+
+import edu.ucsd.msjava.msutil.ActivationMethod;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Pins the {@code -m} ID -> {@link ActivationMethod} mapping. The legacy
+ * dispatch went through the registry order (ASWRITTEN, CID, ETD, HCD, FUSION,
+ * UVPD) with {@code FUSION} hidden by {@code addFragMethodParam(...,
+ * doNotAddMergeMode=true)}, which shifted {@code UVPD} from registry slot 5
+ * to the user-facing index 4. The Phase 4c rewrite originally hardcoded only
+ * 0..3 and silently dropped UVPD; this test guards against regressing it
+ * again.
+ */
+public class MSGFPlusOptionsActivationMethodTest {
+
+ @Test
+ public void defaultIsAsWritten() {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ Assert.assertSame(ActivationMethod.ASWRITTEN, opts.effectiveActivationMethod());
+ }
+
+ @Test
+ public void mapsAllSupportedIndices() {
+ Assert.assertSame(ActivationMethod.ASWRITTEN, withFragMethodId(0).effectiveActivationMethod());
+ Assert.assertSame(ActivationMethod.CID, withFragMethodId(1).effectiveActivationMethod());
+ Assert.assertSame(ActivationMethod.ETD, withFragMethodId(2).effectiveActivationMethod());
+ Assert.assertSame(ActivationMethod.HCD, withFragMethodId(3).effectiveActivationMethod());
+ Assert.assertSame(ActivationMethod.UVPD, withFragMethodId(4).effectiveActivationMethod());
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void rejectsOutOfRangeIndex() {
+ withFragMethodId(5).effectiveActivationMethod();
+ }
+
+ private static MSGFPlusOptions withFragMethodId(int id) {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.fragMethodId = id;
+ return opts;
+ }
+}
From 05e664afb6b4ff8b0686235fb96e08e9799929d8 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 08:09:07 +0100
Subject: [PATCH 24/34] docs: refresh README + module docs after PR #25 cleanup
Remove stale references that no longer match the modernized code:
- README.md: Quick-Start examples now write .pin (default) or
use -outputFormat tsv; the deleted MzIDToTsv conversion step is
gone. The "What is MS-GF+?" paragraph and "What is different in
this fork?" bullets describe the actual current state (mzIdentML
output removed; spectrum input narrowed to mzML + mgf only;
picocli-based CLI). The required-input table now shows
*.mzML / *.mgf only and the -o default is [input].pin.
- docs/msgfplus.md: -s synopsis trimmed to "*.mzML or *.mgf" in
three places; -allowDenseCentroidedPeaks no longer mentions
mzXML; the duplicated "mzML, mzXML, mzML" typo is fixed.
- docs/examples/MSGFPlus_Params.txt: SpectrumFile comment trimmed
to *.mzML / *.mgf.
- docs/readme.md: Input/Output summary now matches the fork's
actual format support; obsolete ms-gfdb.md link removed (the
doc and entry point were deleted in 5a2ec4e).
- docs/troubleshooting.md: FASTA-split workaround now describes
concatenating .pin / .tsv outputs (mzIdentML and MzidMerger no
longer apply). The OpenMS TOPPAS workaround now feeds the .pin
via PercolatorAdapter instead of importing a non-existent .mzid.
- src/test/resources/MSGFDB_Param.txt: removed showDecoy=1 and
uniformAAProb=auto (both ParamNameEnum entries were dropped in
5a2ec4e and now produce "unrecognized parameter" warnings on
every test run). Normalized ParentMassTolerance ->
PrecursorMassTolerance and IsotopeError -> IsotopeErrorRange to
use the canonical keys; the canonicalConfigKey aliases keep the
old names working but the test fixture should be self-documenting.
No code changes; the existing scoped test sweep continues to pass.
---
README.md | 29 ++++++++++++++++-------------
docs/examples/MSGFPlus_Params.txt | 2 +-
docs/msgfplus.md | 14 +++++++-------
docs/readme.md | 5 ++---
docs/troubleshooting.md | 4 ++--
src/test/resources/MSGFDB_Param.txt | 19 +++----------------
6 files changed, 31 insertions(+), 42 deletions(-)
diff --git a/README.md b/README.md
index 14d0e2fa..ac2748d6 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,8 @@
MS-GF+ (aka MSGF+ or MSGFPlus) performs peptide identification by scoring
MS/MS spectra against peptides derived from a protein sequence database.
-It supports the HUPO PSI standard input file (mzML) and additional legacy spectrum inputs, and saves results in
-the mzIdentML format, though results can easily be transformed to TSV.
+It supports the HUPO PSI standard input file (mzML) plus MGF, and writes
+Percolator `.pin` (default) or TSV output.
ProteomeXchange supports Complete data submissions using MS-GF+ search results.
MS-GF+ is developed by Sangtae Kim and the PNNL Proteomics team at the
@@ -22,10 +22,11 @@ Center for Computational Mass Spectrometry, University of California, San Diego.
## What is different in this fork?
- **Streaming mzML parser** -- replaces the in-memory preload with a single-pass StAX parser, significantly reducing memory usage for large files
-- **Primary maintained formats: mzML and MGF** -- mzXML is not available in this fork
+- **Spectrum input narrowed to mzML and MGF** -- mzXML, MS2, PKL, and `_dta.txt` are not supported in this fork
+- **mzIdentML output removed** -- output is Percolator `.pin` (default) or TSV; feed `.pin` straight into Percolator for rescoring
+- **Picocli-based CLI** -- declarative typed flags with auto-generated `-h/--help`
- **Java 17 minimum** -- updated from Java 8
- **CI/CD** -- GitHub Actions for automated testing and releases
-- **Direct TSV output** -- optional TSV output alongside mzIdentML
## Requirements
@@ -39,13 +40,13 @@ Download the latest release from the [Releases page](https://github.com/bigbio/m
## Quick Start
```bash
-# Basic search
+# Basic search (writes results.pin in Percolator format)
java -Xmx4G -jar MSGFPlus.jar \
-s spectra.mzML \
-d database.fasta \
- -o results.mzid
+ -o results.pin
-# TMT search with target-decoy analysis
+# TMT search with target-decoy analysis, Percolator-ready output
java -Xmx8G -jar MSGFPlus.jar \
-s spectra.mzML \
-d database.fasta \
@@ -56,11 +57,13 @@ java -Xmx8G -jar MSGFPlus.jar \
-e 1 \
-protocol 4 \
-mod mods.txt \
- -o results.mzid
+ -o results.pin
-# Convert mzid output to TSV
-java -cp MSGFPlus.jar edu.ucsd.msjava.ui.MzIDToTsv \
- -i results.mzid \
+# Direct TSV output (skip Percolator)
+java -Xmx4G -jar MSGFPlus.jar \
+ -s spectra.mzML \
+ -d database.fasta \
+ -outputFormat tsv \
-o results.tsv
```
@@ -70,14 +73,14 @@ java -cp MSGFPlus.jar edu.ucsd.msjava.ui.MzIDToTsv \
| Flag | Name | Description |
|------|------|-------------|
-| `-s` | SpectrumFile | Input spectrum file (`*.mzML`, `*.mgf`, `*.ms2`, `*.pkl`, `*_dta.txt`). Spectra should be centroided. |
+| `-s` | SpectrumFile | Input spectrum file (`*.mzML`, `*.mgf`). Spectra should be centroided. |
| `-d` | DatabaseFile | Protein sequence database (`*.fasta`, `*.fa`, `*.faa`). |
### Core Search Parameters
| Flag | Name | Default | Description |
|------|------|---------|-------------|
-| `-o` | OutputFile | `[input].mzid` | Output file path (`.mzid` format). |
+| `-o` | OutputFile | `[input].pin` | Output file path (`.pin` Percolator format, default; `.tsv` if `-outputFormat tsv`). |
| `-conf` | ConfigurationFile | — | Configuration file; command-line options override config file settings. |
| `-t` | PrecursorMassTolerance | `20ppm` | Precursor mass tolerance (e.g., `2.5Da`, `20ppm`, or `0.5Da,2.5Da` for asymmetric). |
| `-ti` | IsotopeErrorRange | `0,1` | Range of allowed isotope peak errors (e.g., `-1,2`). |
diff --git a/docs/examples/MSGFPlus_Params.txt b/docs/examples/MSGFPlus_Params.txt
index c1ef196e..8a7e0d16 100644
--- a/docs/examples/MSGFPlus_Params.txt
+++ b/docs/examples/MSGFPlus_Params.txt
@@ -1,5 +1,5 @@
# SpectrumFile
-# *.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt
+# *.mzML or *.mgf
# Spectra should be centroided (see below for MSConvert example). Profile spectra will be ignored.
# Use of -s at the command line will override this filename
#SpectrumFile=InstrumentFile.mzML
diff --git a/docs/msgfplus.md b/docs/msgfplus.md
index 19117e5b..d3a8b3aa 100644
--- a/docs/msgfplus.md
+++ b/docs/msgfplus.md
@@ -10,7 +10,7 @@ Usage: java -Xmx3500M -jar MSGFPlus.jar
An example parameter file is at https://github.com/MSGFPlus/msgfplus/blob/master/docs/examples/MSGFPlus_Params.txt
Additional parameter files are at https://github.com/MSGFPlus/msgfplus/tree/master/docs/parameterfiles
-[-s SpectrumFile] (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
+[-s SpectrumFile] (*.mzML or *.mgf)
Spectra should be centroided (see below for MSConvert example). Profile spectra will be ignored.
[-d DatabaseFile] (*.fasta or *.fa or *.faa)
@@ -123,9 +123,9 @@ Usage: java -Xmx3500M -jar MSGFPlus.jar
[-numMods Count] (Maximum number of dynamic (variable) modifications per peptide; Default: 3)
-[-allowDenseCentroidedPeaks 0/1] (Default: 0 (disabled); 1: (for mzML/mzXML input only) allows inclusion of spectra with high-density centroid data in the search)
- MS-GF+ checks the distance between consecutive peaks in the spectrum, and if the median distance is less than 50 ppm, they are considered profile spectra regardless of the value provided in mzML and mzXML files.
- This parameter allows overriding this check when the mzML/mzXML file says the spectrum is centroided.
+[-allowDenseCentroidedPeaks 0/1] (Default: 0 (disabled); 1: (for mzML input only) allows inclusion of spectra with high-density centroid data in the search)
+ MS-GF+ checks the distance between consecutive peaks in the spectrum, and if the median distance is less than 50 ppm, they are considered profile spectra regardless of the value provided in the mzML file.
+ This parameter allows overriding this check when the mzML file says the spectrum is centroided.
```
@@ -146,10 +146,10 @@ Example command (low-precision spectra):
### Parameters:
-- **-s SpectrumFile** (.mzML\*, \*.mzXML, \*.mgf, \*.ms2, \*.pkl or \*\_dta.txt) - Required
+- **-s SpectrumFile** (\*.mzML or \*.mgf) - Required
- - Spectrum file name. Currently, MS-GF+ supports the following file formats: mzML, mzXML, mzML, mgf, ms2, pkl and \_dta.txt.
- - We recommend to use mzML, whenever possible.
+ - Spectrum file name. This fork supports two spectrum file formats: `mzML` and `mgf`. Legacy formats (`mzXML`, `ms2`, `pkl`, `_dta.txt`) are not supported.
+ - We recommend `mzML` whenever possible.
- For Thermo .raw files, obtain a centroided .mzML using MSConvert, which is part of [ProteoWizard](http://proteowizard.sourceforge.net/).
`MSConvert.exe --mzML --32 --filter "peakPicking true 1-" DatasetName.raw`
diff --git a/docs/readme.md b/docs/readme.md
index 3f58ab68..14fc1ecc 100644
--- a/docs/readme.md
+++ b/docs/readme.md
@@ -10,8 +10,8 @@ Static HTML under `docs/` was replaced with these Markdown pages so they read we
### Summary
- MS-GF+ is an MS/MS database search tool that is sensitive (it identifies more peptides than other database search tools and as many peptides as spectral library search tools) and universal (works well for diverse types of spectra, different configurations of MS instruments and different experimental protocols).
-- Input: HUPO PSI standard mzML (also mzXML / MGF / MS2 / PKL).
-- Output: Percolator `.pin` (default, for rescoring) or TSV. **mzIdentML (`.mzid`) output has been removed as of the next release** — MS-GF+ now feeds downstream Percolator pipelines directly via `.pin`. See [Changelog](changelog.md) for migration notes.
+- Input: HUPO PSI standard mzML and MGF only (mzXML, MS2, PKL, and `_dta.txt` are not supported in this fork).
+- Output: Percolator `.pin` (default, for rescoring) or TSV. mzIdentML (`.mzid`) output has been removed — MS-GF+ now feeds downstream Percolator pipelines directly via `.pin`. See [Changelog](changelog.md) for migration notes.
### Usage and help
@@ -21,7 +21,6 @@ Static HTML under `docs/` was replaced with these Markdown pages so they read we
- [Suffix array builder (BuildSA)](buildsa.md)
- [Isobaric labelling: TMT / TMTpro / iTRAQ recipes](isobariclabeling.md)
- [Troubleshooting & common errors](troubleshooting.md)
-- [MS-GFDB (obsolete)](ms-gfdb.md)
### Publications
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index fa6ce803..1d499daa 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -50,7 +50,7 @@ MS-GF+ currently uses `int`-indexed suffix-array and byte-array structures for t
Affected workflows: metaproteomics, proteogenomics, antibody-repertoire searches, and pan-microbial databases.
-**Workaround today** — split the FASTA into chunks ≤ 250 MB, run one MS-GF+ search per chunk, and merge the resulting mzIdentML files. [MzidMerger](https://github.com/PNNL-Comp-Mass-Spec/Mzid-Merger) is the standard tool for the merge step.
+**Workaround today** — split the FASTA into chunks ≤ 250 MB, run one MS-GF+ search per chunk, and concatenate the resulting `.pin` (or `.tsv`) files. For `.pin` outputs the header line repeats per chunk; drop duplicate header rows after the first, then feed the merged file to Percolator.
**Planned fix** — 64-bit indexed FASTA storage is tracked as Priority 1 in the `bigbio/msgfplus` performance roadmap. See the investigation note in `.claude/investigations/` (not shipped).
@@ -107,7 +107,7 @@ Related issue: [#52](https://github.com/MSGFPlus/msgfplus/issues/52).
Reported in [OpenMS #1764](https://github.com/OpenMS/OpenMS/issues/1764). The command line works; TOPPAS fails because of how it passes environment and quoted arguments.
-**Workaround** — run MS-GF+ directly from the command line and import the resulting mzIdentML into OpenMS.
+**Workaround** — run MS-GF+ directly from the command line and feed the resulting `.pin` (or `.tsv`) into OpenMS via `PercolatorAdapter` / `MSGFPlusAdapter`.
---
diff --git a/src/test/resources/MSGFDB_Param.txt b/src/test/resources/MSGFDB_Param.txt
index c8d71699..8db2b7b9 100644
--- a/src/test/resources/MSGFDB_Param.txt
+++ b/src/test/resources/MSGFDB_Param.txt
@@ -1,8 +1,7 @@
-#Parent mass tolerance
+#Precursor mass tolerance
# Examples: 2.5Da or 30ppm
# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMasstheoMass)
-#PMTolerance=20ppm
-ParentMassTolerance=20ppm
+PrecursorMassTolerance=20ppm
#Max Number of Modifications per peptide
# If this value is large, the search will be slow
@@ -45,7 +44,7 @@ EnzymeID=1
# Ignored if the parent mass tolerance is > 0.5Da or 500ppm
# The combination of -t and -ti determins the precursor mass tolerance.
# e.g. "-t 20ppm -ti -1,2" tests abs(exp-calc-n*1.00335Da)<20ppm for n=-1, 0, 1, 2.
-IsotopeError=-1,2
+IsotopeErrorRange=-1,2
#Number of tryptic termini
# The number of peptide termini that must have been cleaved by the enzyme (default 1)
@@ -57,11 +56,6 @@ NTT=2
# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
TDA=1
-#Include decoy peptides (only applicable when TDA=1)
-# 0 means to not include decoy (reverse) peptides
-# 1 means to include decoy PSMs after all target (forward) PSMs
-showDecoy=1
-
#Number of Threads (by default, uses all available cores)
#In DMS, a settings file entry for MSGFDBThreads will override this value
NumThreads=All
@@ -82,13 +76,6 @@ maxCharge=5
#If this value is greater than 1 then the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
NumMatchesPerSpec=2
-#Amino Acid Frequencies
-# If 0, compute amino acid frequencies from the input database and use them as amino acid probabilities
-# If 1, use uniform amino acid probability (preferable when the database size is small)
-# If auto, then Analysis Manager will auto-set this to 0 if the .Fasta file is more than 20 KB in size
-uniformAAProb=auto
-
-
#Amino Acid Modification Examples
# Specify static modifications using one or more StaticMod= entries
# Specify dynamic modifications using one or more DynamicMod= entries
From 7a19f833356f8f7110b45ca2f0ca6785748c9cd2 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 09:55:41 +0100
Subject: [PATCH 25/34] fix(cli): three Phase 4c regressions + polish on
MSGFPlusOptions
The user code-reviewed the unpushed branch tip and surfaced three
behaviour regressions that all date back to the Phase 4c
ParamManager retire (commit 03f32c1) plus a few small cleanups.
P1 -- config-file keys are now matched case-insensitively, restoring
the legacy ParamManager.parseConfigParamFile semantics. The Phase 4c
switch was exact-case so test fixtures using lowercase first-letter
keys (e.g. "minCharge=2", "maxCharge=5" in MSGFDB_Param.txt) were
silently dropped to defaults instead of overriding them. The fix
lowercases canonicalConfigKey output and makes every applyConfigEntry
case label lowercase. New regression test
configFileKeysAreMatchedCaseInsensitively pins the contract with a
mix of canonical, lowercased-first-letter, and ALLCAPS forms.
P2 -- invalid enum-like CLI indices (-m 99, -inst 99, -e 99,
-protocol 99) and out-of-range numerics now produce a clean
user-facing error from SearchParams.parse instead of an
IllegalArgumentException stack trace from the resolver. validate()
is now invoked in place of validateRequired() and runs the bounds
checks up-front.
P2 -- restored the legacy IntParameter.minValue/maxValue range
checks for: -thread, -tasks, -minSpectraPerThread, -minLength,
-maxLength, -minCharge, -maxCharge, -n, -ntt, -tda, -verbose,
-addFeatures, -allowDenseCentroidedPeaks, -edgeScore,
-ignoreMetCleavage, -maxMissedCleavages, -numMods, -ccm, -u, -m,
-inst, -e, -protocol, plus the hidden flags. New unit test
validateRejectsOutOfRangeFlags pins a representative set.
Polish:
- Drop the dead Phase 1 / MSGFPlusOptionsAdapter / ParamManager
rollout narrative from the MSGFPlusOptions class header (both
the adapter class and ParamManager were deleted in earlier
commits; the comment was stale).
- Collapse the unrecognizedConfigEntries field + local probe
counter in applyConfigFile into a single counter reset at start
and read at end -- one piece of state, simpler control flow.
- Strip the CRLF (\r before \n) on docs/examples/MSGFPlus_Params.txt
and src/test/resources/MSGFDB_Param.txt, which git diff --check
was flagging as trailing whitespace. The rest of the codebase is
LF-only.
Verified:
- mvn -B -o test on the scoped sweep (77 tests, 0 failures, 0 errors,
3 skipped).
- SearchParamsTest no longer warns "unrecognized parameter
'minCharge=2'" / "'maxCharge=5'" when loading MSGFDB_Param.txt.
- git diff --check on this commit is clean.
---
docs/examples/MSGFPlus_Params.txt | 278 +++++++++---------
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 247 ++++++++++------
.../ucsd/msjava/msdbsearch/SearchParams.java | 7 +-
.../cli/MSGFPlusOptionsConfigFileTest.java | 64 ++++
src/test/resources/MSGFDB_Param.txt | 194 ++++++------
5 files changed, 460 insertions(+), 330 deletions(-)
diff --git a/docs/examples/MSGFPlus_Params.txt b/docs/examples/MSGFPlus_Params.txt
index 8a7e0d16..66282805 100644
--- a/docs/examples/MSGFPlus_Params.txt
+++ b/docs/examples/MSGFPlus_Params.txt
@@ -1,139 +1,139 @@
-# SpectrumFile
-# *.mzML or *.mgf
-# Spectra should be centroided (see below for MSConvert example). Profile spectra will be ignored.
-# Use of -s at the command line will override this filename
-#SpectrumFile=InstrumentFile.mzML
-
-# FASTA file
-# "*.fasta or *.fa or *.faa
-# Use of -d at the command line will override this filename
-#DatabaseFile=Proteins.fasta
-
-# Prefix for decoy proteins in the FASTA file
-#DecoyPrefix=XXX
-
-# Precursor mass tolerance
-# Examples: 2.5Da or 30ppm
-# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMasstheoMass)
-PrecursorMassTolerance=20ppm
-
-# Max Number of Dynamic (Variable) Modifications per peptide
-# Default: 3
-# If this value is large, the search will be slow
-NumMods=3
-
-# Modifications (see below for examples)
-StaticMod=C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
-StaticMod=229.1629, *, fix, N-term, TMT6plex
-StaticMod=229.1629, K, fix, any, TMT6plex
-
-DynamicMod=O1, M, opt, any, Oxidation # Oxidized methionine
-DynamicMod=-187.152366, K, opt, any, AcNoTMT # Residue tagged by MSGF+ with static TMT6, but is actually acetylated and does not have TMT
-
-# Custom AA specification
-#CustomAA=C3H5NO, U, custom, U, Selenocysteine # Custom amino acids can only have C, H, N, O, and S
-#CustomAA=C6H11NO, X, custom, X, Leu_Ile # Leucine or Isoleucine
-
-# Fragmentation Method
-# 0 means as written in the spectrum or CID if no info (Default)
-# 1 means CID
-# 2 means ETD
-# 3 means HCD
-FragmentationMethodID=0
-
-# Instrument ID
-# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
-# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID); use InstrumentID=1 for Orbitrap, Lumos, and QEHFX instruments
-# 2 means TOF
-# 3 means Q-Exactive
-InstrumentID=1
-
-# Enzyme ID
-# 0 means unspecific cleavage (cleave after any residue)
-# 1 means Trypsin (Default); optionally use this along with NTT=0 for a no-enzyme-specificity search of a tryptically digested sample
-# 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N, 8: alphaLP, 9: No Cleavage (for peptidomics), 10: TrypPlusC (cleave after K, R, or C)
-EnzymeID=1
-
-# Isotope error range
-# Takes into account of the error introduced by choosing non-monoisotopic peak for fragmentation.
-# Useful for accurate precursor ion masses
-# Ignored if the parent mass tolerance is > 0.5Da or 500ppm
-# The combination of -t and -ti determins the precursor mass tolerance.
-# e.g. "-t 20ppm -ti -1,2" tests abs(exp-calc-n*1.00335Da)<20ppm for n=-1, 0, 1, 2.
-IsotopeErrorRange=-1,2
-
-# Number of tolerable termini
-# The number of peptide termini that must have been cleaved by the enzyme (default 1)
-# For trypsin, 2 means fully tryptic only, 1 means partially tryptic, and 0 means no-enzyme search
-NTT=2
-
-# Control N-terminal methionine cleavage
-# 0 means to consider protein N-term Met cleavage (Default)
-# 1 means to ignore protein N-term Met cleavage
-IgnoreMetCleavage=0
-
-# Target/Decoy search mode
-# 0 means don't search decoy database (default)
-# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
-TDA=1
-
-# Number of concurrent threads to be executed
-# Default: Number of available cores
-# To use three threads use NumThreads=3
-NumThreads=All
-
-# Minimum peptide length to consider
-# Default: 6
-MinPepLength=6
-
-# Maximum peptide length to consider
-# Default: 40
-MaxPepLength=50
-
-# Minimum precursor charge to consider (if not specified in the spectrum file)
-# Default: 2
-MinCharge=2
-
-# Maximum precursor charge to consider (if not specified in the spectrum file)
-# Default: 3
-MaxCharge=5
-
-# Number of matches per spectrum to be reported
-# If this value is greater than 1, the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
-NumMatchesPerSpec=1
-
-# Mass of charge carrier
-# Default: mass of proton
-#ChargeCarrierMass=1.00727649
-
-# Maximum missed cleavages
-# Exclude peptides with more than this number of missed cleavages from the search, Default: -1 (no limit)
-#MaxMissedCleavages=-1
-
-# Minimum number of peaks per spectrum, Default:
-# Default: 10
-#MinNumPeaksPerSpectrum=10
-
-# Number of isoforms to consider per peptide
-# Default: 128
-#NumIsoforms=128
-
-# Amino Acid Modification Examples
-# Specify static modifications using one or more StaticMod= entries
-# Specify dynamic modifications using one or more DynamicMod= entries
-# Modification format is:
-# Mass or CompositionString, Residues, ModType, Position, Name (all five fields are required).
-# CompositionString can only contain a limited set of elements, primarily C H N O S or P
-#
-# Examples:
-# C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
-# O1, M, opt, any, Oxidation # Oxidation M
-# 15.994915, M, opt, any, Oxidation # Oxidation M (mass is used instead of CompositionString)
-# H-1N-1O1, NQ, opt, any, Deamidated # Negative numbers are allowed.
-# CH2, K, opt, any, Methyl # Methylation K
-# C2H2O1, K, opt, any, Acetyl # Acetylation K
-# HO3P, STY,opt, any, Phospho # Phosphorylation STY
-# C2H3NO, *, opt, N-term, Carbamidomethyl # Variable Carbamidomethyl N-term
-# H-2O-1, E, opt, N-term, Glu->pyro-Glu # Pyro-glu from E
-# H-3N-1, Q, opt, N-term, Gln->pyro-Glu # Pyro-glu from Q
-# C2H2O, *, opt, Prot-N-term, Acetyl # Acetylation Protein N-term
+# SpectrumFile
+# *.mzML or *.mgf
+# Spectra should be centroided (see below for MSConvert example). Profile spectra will be ignored.
+# Use of -s at the command line will override this filename
+#SpectrumFile=InstrumentFile.mzML
+
+# FASTA file
+# "*.fasta or *.fa or *.faa
+# Use of -d at the command line will override this filename
+#DatabaseFile=Proteins.fasta
+
+# Prefix for decoy proteins in the FASTA file
+#DecoyPrefix=XXX
+
+# Precursor mass tolerance
+# Examples: 2.5Da or 30ppm
+# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMasstheoMass)
+PrecursorMassTolerance=20ppm
+
+# Max Number of Dynamic (Variable) Modifications per peptide
+# Default: 3
+# If this value is large, the search will be slow
+NumMods=3
+
+# Modifications (see below for examples)
+StaticMod=C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
+StaticMod=229.1629, *, fix, N-term, TMT6plex
+StaticMod=229.1629, K, fix, any, TMT6plex
+
+DynamicMod=O1, M, opt, any, Oxidation # Oxidized methionine
+DynamicMod=-187.152366, K, opt, any, AcNoTMT # Residue tagged by MSGF+ with static TMT6, but is actually acetylated and does not have TMT
+
+# Custom AA specification
+#CustomAA=C3H5NO, U, custom, U, Selenocysteine # Custom amino acids can only have C, H, N, O, and S
+#CustomAA=C6H11NO, X, custom, X, Leu_Ile # Leucine or Isoleucine
+
+# Fragmentation Method
+# 0 means as written in the spectrum or CID if no info (Default)
+# 1 means CID
+# 2 means ETD
+# 3 means HCD
+FragmentationMethodID=0
+
+# Instrument ID
+# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
+# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID); use InstrumentID=1 for Orbitrap, Lumos, and QEHFX instruments
+# 2 means TOF
+# 3 means Q-Exactive
+InstrumentID=1
+
+# Enzyme ID
+# 0 means unspecific cleavage (cleave after any residue)
+# 1 means Trypsin (Default); optionally use this along with NTT=0 for a no-enzyme-specificity search of a tryptically digested sample
+# 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N, 8: alphaLP, 9: No Cleavage (for peptidomics), 10: TrypPlusC (cleave after K, R, or C)
+EnzymeID=1
+
+# Isotope error range
+# Takes into account of the error introduced by choosing non-monoisotopic peak for fragmentation.
+# Useful for accurate precursor ion masses
+# Ignored if the parent mass tolerance is > 0.5Da or 500ppm
+# The combination of -t and -ti determins the precursor mass tolerance.
+# e.g. "-t 20ppm -ti -1,2" tests abs(exp-calc-n*1.00335Da)<20ppm for n=-1, 0, 1, 2.
+IsotopeErrorRange=-1,2
+
+# Number of tolerable termini
+# The number of peptide termini that must have been cleaved by the enzyme (default 1)
+# For trypsin, 2 means fully tryptic only, 1 means partially tryptic, and 0 means no-enzyme search
+NTT=2
+
+# Control N-terminal methionine cleavage
+# 0 means to consider protein N-term Met cleavage (Default)
+# 1 means to ignore protein N-term Met cleavage
+IgnoreMetCleavage=0
+
+# Target/Decoy search mode
+# 0 means don't search decoy database (default)
+# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
+TDA=1
+
+# Number of concurrent threads to be executed
+# Default: Number of available cores
+# To use three threads use NumThreads=3
+NumThreads=All
+
+# Minimum peptide length to consider
+# Default: 6
+MinPepLength=6
+
+# Maximum peptide length to consider
+# Default: 40
+MaxPepLength=50
+
+# Minimum precursor charge to consider (if not specified in the spectrum file)
+# Default: 2
+MinCharge=2
+
+# Maximum precursor charge to consider (if not specified in the spectrum file)
+# Default: 3
+MaxCharge=5
+
+# Number of matches per spectrum to be reported
+# If this value is greater than 1, the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
+NumMatchesPerSpec=1
+
+# Mass of charge carrier
+# Default: mass of proton
+#ChargeCarrierMass=1.00727649
+
+# Maximum missed cleavages
+# Exclude peptides with more than this number of missed cleavages from the search, Default: -1 (no limit)
+#MaxMissedCleavages=-1
+
+# Minimum number of peaks per spectrum, Default:
+# Default: 10
+#MinNumPeaksPerSpectrum=10
+
+# Number of isoforms to consider per peptide
+# Default: 128
+#NumIsoforms=128
+
+# Amino Acid Modification Examples
+# Specify static modifications using one or more StaticMod= entries
+# Specify dynamic modifications using one or more DynamicMod= entries
+# Modification format is:
+# Mass or CompositionString, Residues, ModType, Position, Name (all five fields are required).
+# CompositionString can only contain a limited set of elements, primarily C H N O S or P
+#
+# Examples:
+# C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
+# O1, M, opt, any, Oxidation # Oxidation M
+# 15.994915, M, opt, any, Oxidation # Oxidation M (mass is used instead of CompositionString)
+# H-1N-1O1, NQ, opt, any, Deamidated # Negative numbers are allowed.
+# CH2, K, opt, any, Methyl # Methylation K
+# C2H2O1, K, opt, any, Acetyl # Acetylation K
+# HO3P, STY,opt, any, Phospho # Phosphorylation STY
+# C2H3NO, *, opt, N-term, Carbamidomethyl # Variable Carbamidomethyl N-term
+# H-2O-1, E, opt, N-term, Glu->pyro-Glu # Pyro-glu from E
+# H-3N-1, Q, opt, N-term, Gln->pyro-Glu # Pyro-glu from Q
+# C2H2O, *, opt, Prot-N-term, Acetyl # Acetylation Protein N-term
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index b010ab27..0991add6 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -15,18 +15,12 @@
import java.util.List;
/**
- * Typed command-line options for MS-GF+. Replaces the imperative
- * {@code addParameter()} calls in {@code ParamManager.addMSGFPlusParams()}
- * with declarative picocli annotations.
- *
- * Phase 1 scope: every flag from {@link ParamNameEnum} that
- * {@code addMSGFPlusParams()} registers, parsed into typed fields.
- * Complex domain types (Tolerance, IntRange, dynamic enums) are
- * captured here as raw strings; the adapter at
- * {@code MSGFPlusOptionsAdapter} round-trips them through the existing
- * {@code params.Parameter#parse(String)} hierarchy to populate a
- * {@code ParamManager} that {@code SearchParams.parse(ParamManager)}
- * can consume unchanged. Phase 3 collapses that round-trip away.
+ * Typed command-line options for MS-GF+. Picocli reads {@code argv} into
+ * the {@code @Option}-annotated fields below; {@link #applyConfigFile}
+ * fills in any field the CLI did not set from a {@code -conf} file
+ * (CLI takes precedence). {@link #validate} enforces required-input
+ * and numeric/enum range invariants. Each {@code effectiveXxx()} accessor
+ * returns the user-supplied value or the legacy default.
*
* Flag inventory: see {@code .claude/plans/parameter-modernization-flag-inventory.md}.
*/
@@ -335,7 +329,7 @@ public Protocol effectiveProtocol() {
* @return null on success, error string otherwise.
*/
public String applyConfigFile(File file) {
- int unrecognizedCount = 0;
+ unrecognizedConfigEntries = 0;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
int lineNum = 0;
@@ -348,17 +342,15 @@ public String applyConfigFile(File file) {
String rawKey = trimmed.substring(0, eq).trim();
String value = trimmed.substring(eq + 1).trim();
String key = canonicalConfigKey(rawKey);
- int before = unrecognizedConfigEntries;
String err = applyConfigEntry(key, value, file.getName());
if (err != null) {
return "Error parsing line " + lineNum + " of " + file.getName() + ": " + err;
}
- if (unrecognizedConfigEntries > before) unrecognizedCount++;
}
} catch (IOException e) {
return "Error reading config file " + file.getPath() + ": " + e.getMessage();
}
- if (unrecognizedCount > 0) {
+ if (unrecognizedConfigEntries > 0) {
System.out.println("Valid parameters are described in the example parameter file at " +
"https://github.com/MSGFPlus/msgfplus/blob/master/docs/examples/MSGFPlus_Params.txt");
}
@@ -366,69 +358,66 @@ public String applyConfigFile(File file) {
}
/** Counter incremented inside {@link #applyConfigEntry} whenever an unknown
- * config-file key is seen; surfaced via the end-of-file URL hint. */
+ * config-file key is seen; surfaced via the end-of-file URL hint and
+ * reset at the start of each {@link #applyConfigFile} call. */
private int unrecognizedConfigEntries;
private String applyConfigEntry(String key, String value, String fileName) {
- // Repeated entries: collect into lists. "none" is treated as no entry.
- if (key.equalsIgnoreCase("DynamicMod")) {
- if (!value.equalsIgnoreCase("none")) dynamicMods.add(value);
- return null;
- }
- if (key.equalsIgnoreCase("StaticMod")) {
- if (!value.equalsIgnoreCase("none")) staticMods.add(value);
- return null;
- }
- if (key.equalsIgnoreCase("CustomAA")) {
- if (!value.equalsIgnoreCase("none")) customAAs.add(value);
- return null;
+ // Config-file matching is case-insensitive. canonicalConfigKey()
+ // already returns lowercase canonical names, so the switch labels
+ // are lowercase too. Repeated mod entries are matched first since
+ // they accumulate rather than overwrite.
+ switch (key) {
+ case "dynamicmod": if (!value.equalsIgnoreCase("none")) dynamicMods.add(value); return null;
+ case "staticmod": if (!value.equalsIgnoreCase("none")) staticMods.add(value); return null;
+ case "customaa": if (!value.equalsIgnoreCase("none")) customAAs.add(value); return null;
+ default: break;
}
// Single-valued entries: only fill in if CLI did not set the field.
try {
switch (key) {
- case "SpectrumFile": if (spectrumFile == null) spectrumFile = new File(value); return null;
- case "DatabaseFile": if (databaseFile == null) databaseFile = new File(value); return null;
- case "OutputFile": if (outputFile == null) outputFile = new File(value); return null;
- case "ModificationFileName":
- case "ModificationFile": if (modificationFile == null) modificationFile = new File(value); return null;
- case "DBIndexDir": if (dbIndexDir == null) dbIndexDir = new File(value); return null;
- case "DecoyPrefix": if (decoyPrefix == null) decoyPrefix = value; return null;
- case "PrecursorMassTolerance": if (precursorTolerance == null) precursorTolerance = PrecursorTolerance.parse(value); return null;
- case "PrecursorMassToleranceUnits":
- if (precursorToleranceUnits == null) precursorToleranceUnits = Integer.parseInt(value); return null;
- case "IsotopeErrorRange": if (isotopeErrorRange == null) isotopeErrorRange = IntRange.parse(value); return null;
- case "FragmentationMethodID": if (fragMethodId == null) fragMethodId = Integer.parseInt(value); return null;
- case "InstrumentID": if (instrumentTypeId == null) instrumentTypeId = Integer.parseInt(value); return null;
- case "EnzymeID": if (enzymeId == null) enzymeId = Integer.parseInt(value); return null;
- case "ProtocolID": if (protocolId == null) protocolId = Integer.parseInt(value); return null;
- case "NTT": if (numTolerableTermini == null) numTolerableTermini = Integer.parseInt(value); return null;
- case "MinPepLength": if (minPeptideLength == null) minPeptideLength = Integer.parseInt(value); return null;
- case "MaxPepLength": if (maxPeptideLength == null) maxPeptideLength = Integer.parseInt(value); return null;
- case "MinCharge": if (minCharge == null) minCharge = Integer.parseInt(value); return null;
- case "MaxCharge": if (maxCharge == null) maxCharge = Integer.parseInt(value); return null;
- case "NumMatchesPerSpec": if (numMatchesPerSpec == null) numMatchesPerSpec = Integer.parseInt(value); return null;
- case "NumThreads": if (numThreads == null) { if (!value.equalsIgnoreCase("all")) numThreads = Integer.parseInt(value); } return null;
- case "NumTasks": if (numTasks == null) numTasks = Integer.parseInt(value); return null;
- case "MinSpectraPerThread": if (minSpectraPerThread == null) minSpectraPerThread = Integer.parseInt(value); return null;
- case "Verbose": if (verbose == null) verbose = Integer.parseInt(value); return null;
- case "TDA": if (tdaStrategy == null) tdaStrategy = Integer.parseInt(value); return null;
- case "AddFeatures": if (addFeatures == null) addFeatures = Integer.parseInt(value); return null;
- case "OutputFormat": if (outputFormat == null) outputFormat = value; return null;
- case "PrecursorCal": if (precursorCalMode == null) precursorCalMode = value; return null;
- case "ChargeCarrierMass": if (chargeCarrierMass == null) chargeCarrierMass = Double.parseDouble(value); return null;
- case "MaxMissedCleavages": if (maxMissedCleavages == null) maxMissedCleavages = Integer.parseInt(value); return null;
- case "NumMods": if (maxNumMods == null) configMaxNumMods = Integer.parseInt(value); return null;
- case "AllowDenseCentroidedPeaks":
- if (allowDenseCentroidedPeaks == null) allowDenseCentroidedPeaks = Integer.parseInt(value); return null;
- case "MSLevel": if (msLevel == null) msLevel = IntRange.parse(value); return null;
- case "SpecIndex": if (specIndexRange == null) specIndexRange = IntRange.parse(value); return null;
- case "EdgeScore": if (edgeScore == null) edgeScore = Integer.parseInt(value); return null;
- case "MinNumPeaksPerSpectrum": if (minNumPeaks == null) minNumPeaks = Integer.parseInt(value); return null;
- case "NumIsoforms": if (numIsoforms == null) numIsoforms = Integer.parseInt(value); return null;
- case "IgnoreMetCleavage": if (ignoreMetCleavage == null) ignoreMetCleavage = Integer.parseInt(value); return null;
- case "MinDeNovoScore": if (minDeNovoScore == null) minDeNovoScore = Integer.parseInt(value); return null;
+ case "spectrumfile": if (spectrumFile == null) spectrumFile = new File(value); return null;
+ case "databasefile": if (databaseFile == null) databaseFile = new File(value); return null;
+ case "outputfile": if (outputFile == null) outputFile = new File(value); return null;
+ case "modificationfilename":
+ case "modificationfile": if (modificationFile == null) modificationFile = new File(value); return null;
+ case "dbindexdir": if (dbIndexDir == null) dbIndexDir = new File(value); return null;
+ case "decoyprefix": if (decoyPrefix == null) decoyPrefix = value; return null;
+ case "precursormasstolerance": if (precursorTolerance == null) precursorTolerance = PrecursorTolerance.parse(value); return null;
+ case "precursormasstoleranceunits":if (precursorToleranceUnits == null) precursorToleranceUnits = Integer.parseInt(value); return null;
+ case "isotopeerrorrange": if (isotopeErrorRange == null) isotopeErrorRange = IntRange.parse(value); return null;
+ case "fragmentationmethodid": if (fragMethodId == null) fragMethodId = Integer.parseInt(value); return null;
+ case "instrumentid": if (instrumentTypeId == null) instrumentTypeId = Integer.parseInt(value); return null;
+ case "enzymeid": if (enzymeId == null) enzymeId = Integer.parseInt(value); return null;
+ case "protocolid": if (protocolId == null) protocolId = Integer.parseInt(value); return null;
+ case "ntt": if (numTolerableTermini == null) numTolerableTermini = Integer.parseInt(value); return null;
+ case "minpeplength": if (minPeptideLength == null) minPeptideLength = Integer.parseInt(value); return null;
+ case "maxpeplength": if (maxPeptideLength == null) maxPeptideLength = Integer.parseInt(value); return null;
+ case "mincharge": if (minCharge == null) minCharge = Integer.parseInt(value); return null;
+ case "maxcharge": if (maxCharge == null) maxCharge = Integer.parseInt(value); return null;
+ case "nummatchesperspec": if (numMatchesPerSpec == null) numMatchesPerSpec = Integer.parseInt(value); return null;
+ case "numthreads": if (numThreads == null && !value.equalsIgnoreCase("all"))
+ numThreads = Integer.parseInt(value); return null;
+ case "numtasks": if (numTasks == null) numTasks = Integer.parseInt(value); return null;
+ case "minspectraperthread": if (minSpectraPerThread == null) minSpectraPerThread = Integer.parseInt(value); return null;
+ case "verbose": if (verbose == null) verbose = Integer.parseInt(value); return null;
+ case "tda": if (tdaStrategy == null) tdaStrategy = Integer.parseInt(value); return null;
+ case "addfeatures": if (addFeatures == null) addFeatures = Integer.parseInt(value); return null;
+ case "outputformat": if (outputFormat == null) outputFormat = value; return null;
+ case "precursorcal": if (precursorCalMode == null) precursorCalMode = value; return null;
+ case "chargecarriermass": if (chargeCarrierMass == null) chargeCarrierMass = Double.parseDouble(value); return null;
+ case "maxmissedcleavages": if (maxMissedCleavages == null) maxMissedCleavages = Integer.parseInt(value); return null;
+ case "nummods": if (maxNumMods == null) configMaxNumMods = Integer.parseInt(value); return null;
+ case "allowdensecentroidedpeaks": if (allowDenseCentroidedPeaks == null) allowDenseCentroidedPeaks = Integer.parseInt(value); return null;
+ case "mslevel": if (msLevel == null) msLevel = IntRange.parse(value); return null;
+ case "specindex": if (specIndexRange == null) specIndexRange = IntRange.parse(value); return null;
+ case "edgescore": if (edgeScore == null) edgeScore = Integer.parseInt(value); return null;
+ case "minnumpeaksperspectrum": if (minNumPeaks == null) minNumPeaks = Integer.parseInt(value); return null;
+ case "numisoforms": if (numIsoforms == null) numIsoforms = Integer.parseInt(value); return null;
+ case "ignoremetcleavage": if (ignoreMetCleavage == null) ignoreMetCleavage = Integer.parseInt(value); return null;
+ case "mindenovoscore": if (minDeNovoScore == null) minDeNovoScore = Integer.parseInt(value); return null;
default:
- if (!key.toLowerCase().startsWith("enzymedef")) {
+ if (!key.startsWith("enzymedef")) {
System.out.println("Warning, unrecognized parameter '" + key + "=" + value + "' in config file " + fileName);
unrecognizedConfigEntries++;
}
@@ -445,35 +434,109 @@ private static String stripComment(String line) {
}
/** Normalize legacy / alternate config-file keys to canonical form.
- * Mirrors the rewrites previously in {@code ParamNameEnum.getParamNameFromLine}. */
+ * Returns lowercase so {@link #applyConfigEntry} can match
+ * case-insensitively (the legacy {@code ParamManager.parseConfigParamFile}
+ * matched names with {@code equalsIgnoreCase}). Mirrors the alias
+ * rewrites previously in {@code ParamNameEnum.getParamNameFromLine}. */
private static String canonicalConfigKey(String key) {
- if (key.equalsIgnoreCase("IsotopeError")) return "IsotopeErrorRange";
- if (key.equalsIgnoreCase("TargetDecoyAnalysis")) return "TDA";
- if (key.equalsIgnoreCase("FragmentationMethod")) return "FragmentationMethodID";
- if (key.equalsIgnoreCase("Instrument")) return "InstrumentID";
- if (key.equalsIgnoreCase("Enzyme")) return "EnzymeID";
- if (key.equalsIgnoreCase("Protocol")) return "ProtocolID";
- if (key.equalsIgnoreCase("NumTolerableTermini")) return "NTT";
- if (key.equalsIgnoreCase("MinNumPeaks")) return "MinNumPeaksPerSpectrum";
- if (key.equalsIgnoreCase("MaxNumMods")) return "NumMods";
- if (key.equalsIgnoreCase("MaxNumModsPerPeptide")) return "NumMods";
- if (key.equalsIgnoreCase("minLength")) return "MinPepLength";
- if (key.equalsIgnoreCase("MinPeptideLength")) return "MinPepLength";
- if (key.equalsIgnoreCase("maxLength")) return "MaxPepLength";
- if (key.equalsIgnoreCase("MaxPeptideLength")) return "MaxPepLength";
- if (key.equalsIgnoreCase("PMTolerance")) return "PrecursorMassTolerance";
- if (key.equalsIgnoreCase("ParentMassTolerance")) return "PrecursorMassTolerance";
- return key;
+ String norm = key.toLowerCase(java.util.Locale.ROOT);
+ switch (norm) {
+ case "isotopeerror": return "isotopeerrorrange";
+ case "targetdecoyanalysis": return "tda";
+ case "fragmentationmethod": return "fragmentationmethodid";
+ case "instrument": return "instrumentid";
+ case "enzyme": return "enzymeid";
+ case "protocol": return "protocolid";
+ case "numtolerabletermini": return "ntt";
+ case "minnumpeaks": return "minnumpeaksperspectrum";
+ case "maxnummods": return "nummods";
+ case "maxnummodsperpeptide": return "nummods";
+ case "minlength": return "minpeplength";
+ case "minpeptidelength": return "minpeplength";
+ case "maxlength": return "maxpeplength";
+ case "maxpeptidelength": return "maxpeplength";
+ case "pmtolerance": return "precursormasstolerance";
+ case "parentmasstolerance": return "precursormasstolerance";
+ default: return norm;
+ }
}
- /** Validates required-input invariants that the CLI alone can't enforce
- * (since {@code -s}/{@code -d} may come from {@code -conf}). */
- public String validateRequired() {
+ /** Validates required-input invariants and the numeric/enum range
+ * constraints the legacy {@code IntParameter.minValue}/{@code maxValue}
+ * and {@code EnumParameter} machinery used to enforce. Returns
+ * {@code null} on success or a user-facing error string otherwise.
+ *
+ * Required: {@code -s} and {@code -d} (either via CLI or {@code -conf}).
+ * Numeric flags must satisfy their original lower bounds; enum-shaped
+ * flags must fall in their defined index range. */
+ public String validate() {
if (spectrumFile == null) return "Spectrum file is not defined; use -s at the command line or SpectrumFile in a config file";
if (databaseFile == null) return "Database file is not defined; use -d at the command line or DatabaseFile in a config file";
+
+ String err;
+ if ((err = checkMin("-thread", numThreads, 1)) != null) return err;
+ if ((err = checkMin("-tasks", numTasks, -10)) != null) return err;
+ if ((err = checkMin("-minSpectraPerThread", minSpectraPerThread, 1)) != null) return err;
+ if ((err = checkMin("-minLength", minPeptideLength, 1)) != null) return err;
+ if ((err = checkMin("-maxLength", maxPeptideLength, 1)) != null) return err;
+ if ((err = checkMin("-minCharge", minCharge, 1)) != null) return err;
+ if ((err = checkMin("-maxCharge", maxCharge, 1)) != null) return err;
+ if ((err = checkMin("-n", numMatchesPerSpec, 1)) != null) return err;
+ if ((err = checkMin("-maxMissedCleavages", maxMissedCleavages, -1)) != null) return err;
+ if ((err = checkMin("-numMods", maxNumMods, 0)) != null) return err;
+ if ((err = checkMin("-minNumPeaks", minNumPeaks, 0)) != null) return err;
+ if ((err = checkMin("-iso", numIsoforms, 0)) != null) return err;
+ if ((err = checkMin("-minDeNovoScore", minDeNovoScore, Integer.MIN_VALUE)) != null) return err;
+
+ if ((err = checkRange("-ntt", numTolerableTermini, 0, 2)) != null) return err;
+ if ((err = checkRange("-tda", tdaStrategy, 0, 1)) != null) return err;
+ if ((err = checkRange("-verbose", verbose, 0, 1)) != null) return err;
+ if ((err = checkRange("-addFeatures", addFeatures, 0, 1)) != null) return err;
+ if ((err = checkRange("-allowDenseCentroidedPeaks", allowDenseCentroidedPeaks, 0, 1)) != null) return err;
+ if ((err = checkRange("-edgeScore", edgeScore, 0, 1)) != null) return err;
+ if ((err = checkRange("-ignoreMetCleavage", ignoreMetCleavage, 0, 1)) != null) return err;
+ if ((err = checkRange("-u", precursorToleranceUnits, 0, 2)) != null) return err;
+
+ if (chargeCarrierMass != null && chargeCarrierMass <= 0.1) {
+ return "Invalid value for parameter -ccm: " + chargeCarrierMass + " (must be > 0.1)";
+ }
+
+ if (fragMethodId != null && (fragMethodId < 0 || fragMethodId > 4)) {
+ return "Invalid value for parameter -m: " + fragMethodId + " (valid: 0..4)";
+ }
+ int instMax = ActivationMethodAvailability.instCount() - 1;
+ if (instrumentTypeId != null && (instrumentTypeId < 0 || instrumentTypeId > instMax)) {
+ return "Invalid value for parameter -inst: " + instrumentTypeId + " (valid: 0.." + instMax + ")";
+ }
+ int enzMax = Enzyme.getAllRegisteredEnzymes().length - 1;
+ if (enzymeId != null && (enzymeId < 0 || enzymeId > enzMax)) {
+ return "Invalid value for parameter -e: " + enzymeId + " (valid: 0.." + enzMax + ")";
+ }
+ int protMax = Protocol.getAllRegisteredProtocols().length - 1;
+ if (protocolId != null && (protocolId < 0 || protocolId > protMax)) {
+ return "Invalid value for parameter -protocol: " + protocolId + " (valid: 0.." + protMax + ")";
+ }
+ return null;
+ }
+
+ private static String checkMin(String flag, Integer value, int min) {
+ if (value == null) return null;
+ if (value < min) return "Invalid value for parameter " + flag + ": " + value + " (must be >= " + min + ")";
return null;
}
+ private static String checkRange(String flag, Integer value, int min, int max) {
+ if (value == null) return null;
+ if (value < min || value > max) return "Invalid value for parameter " + flag + ": " + value + " (valid: " + min + ".." + max + ")";
+ return null;
+ }
+
+ /** Helper that hides the {@link InstrumentType#getAllRegisteredInstrumentTypes}
+ * call from {@code validate()} so the import block stays minimal. */
+ private static final class ActivationMethodAvailability {
+ static int instCount() { return InstrumentType.getAllRegisteredInstrumentTypes().length; }
+ }
+
/** Mutator used by {@code AminoAcidSet} when the parsed mod metadata
* changes the effective max-num-mods (the AA set is authoritative once
* loaded). Mirrors the legacy {@code ParamManager.setMaxNumMods}. */
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index 9897f010..55647240 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -330,8 +330,11 @@ public String parse(MSGFPlusOptions opts) {
if (err != null) return err;
}
- // Required-input check now that CLI + config-file have both run.
- String requiredErr = opts.validateRequired();
+ // Required-input + numeric/enum range check now that CLI +
+ // config-file have both run. Catches things like -m 99 with a
+ // user-facing error instead of the IllegalArgumentException
+ // the resolver would otherwise raise during search setup.
+ String requiredErr = opts.validate();
if (requiredErr != null) return requiredErr;
chargeCarrierMass = opts.effectiveChargeCarrierMass();
diff --git a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
index c900ff01..d62867eb 100644
--- a/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
+++ b/src/test/java/edu/ucsd/msjava/cli/MSGFPlusOptionsConfigFileTest.java
@@ -57,4 +57,68 @@ public void configFileWithCustomAAParsesWithoutCrashing() throws IOException, UR
Files.deleteIfExists(conf);
Files.deleteIfExists(tmpDir);
}
+
+ /**
+ * Regression for the case-insensitive config-key match. The legacy
+ * {@code ParamManager.parseConfigParamFile} matched names with
+ * {@code equalsIgnoreCase}; the Phase 4c switch was exact-case so
+ * {@code minCharge=} / {@code maxCharge=} from the test fixture
+ * silently fell back to defaults instead of overriding them.
+ */
+ @Test
+ public void configFileKeysAreMatchedCaseInsensitively() throws IOException {
+ Path tmpDir = Files.createTempDirectory("msgfplus-caseinsens-");
+ Path conf = tmpDir.resolve("mixed_case.txt");
+ // Mix of canonical, lowercased-first-letter, and ALLCAPS forms.
+ Files.write(conf, ("MinPepLength=8\n"
+ + "maxpepLength=42\n"
+ + "MINCHARGE=3\n"
+ + "maxcharge=7\n"
+ + "TDA=1\n").getBytes(StandardCharsets.UTF_8));
+
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ Assert.assertNull(opts.applyConfigFile(conf.toFile()));
+
+ Assert.assertEquals(8, opts.effectiveMinPeptideLength());
+ Assert.assertEquals(42, opts.effectiveMaxPeptideLength());
+ Assert.assertEquals(3, opts.effectiveMinCharge());
+ Assert.assertEquals(7, opts.effectiveMaxCharge());
+ Assert.assertEquals(1, opts.effectiveTdaStrategy());
+
+ Files.deleteIfExists(conf);
+ Files.deleteIfExists(tmpDir);
+ }
+
+ /**
+ * Pin the numeric/enum range validation that the legacy
+ * {@code IntParameter.minValue}/{@code maxValue} machinery used to
+ * enforce. After Phase 4c those checks initially disappeared; restoring
+ * them ensures invalid CLI input produces a clean error string instead
+ * of a stack trace from a downstream resolver.
+ */
+ @Test
+ public void validateRejectsOutOfRangeFlags() {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ opts.spectrumFile = new File("anything.mgf");
+ opts.databaseFile = new File("anything.fasta");
+
+ opts.numThreads = 0;
+ Assert.assertNotNull("numThreads=0 must be rejected", opts.validate());
+ opts.numThreads = null;
+
+ opts.fragMethodId = 99;
+ Assert.assertNotNull("-m 99 must be rejected with a user-facing error", opts.validate());
+ opts.fragMethodId = null;
+
+ opts.numTolerableTermini = 5;
+ Assert.assertNotNull("-ntt 5 must be rejected (valid 0..2)", opts.validate());
+ opts.numTolerableTermini = null;
+
+ opts.tdaStrategy = 2;
+ Assert.assertNotNull("-tda 2 must be rejected (valid 0..1)", opts.validate());
+ opts.tdaStrategy = null;
+
+ // A clean invocation passes.
+ Assert.assertNull(opts.validate());
+ }
}
diff --git a/src/test/resources/MSGFDB_Param.txt b/src/test/resources/MSGFDB_Param.txt
index 8db2b7b9..6d33882a 100644
--- a/src/test/resources/MSGFDB_Param.txt
+++ b/src/test/resources/MSGFDB_Param.txt
@@ -1,97 +1,97 @@
-#Precursor mass tolerance
-# Examples: 2.5Da or 30ppm
-# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMasstheoMass)
-PrecursorMassTolerance=20ppm
-
-#Max Number of Modifications per peptide
-# If this value is large, the search will be slow
-NumMods=4
-
-#Modifications (see below for examples)
-StaticMod=C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
-StaticMod=229.1629, *, fix, N-term, TMT6plex
-StaticMod=229.1629, K, fix, any, TMT6plex
-
-DynamicMod=O1, M, opt, any, Oxidation # Oxidized methionine
-DynamicMod=-187.152366, K, opt, any, AcNoTMT # Residue tagged by MSGF+ with static TMT6, but is actually acetylated and does not have TMT
-
-#Fragmentation Method
-# 0 means as written in the spectrum or CID if no info (Default)
-# 1 means CID
-# 2 means ETD
-# 3 means HCD
-# 4 means Merge spectra from the same precursor (e.g. CID/ETD pairs, CID/HCD/ETD triplets)
-# (note that for Thermo instruments this is always set to 0 by the Analysis Manager since a _ScanType.txt file is created with this information on a per-scan basis)
-FragmentationMethodID=0
-
-#Instrument ID
-# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
-# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID). Do not merge spectra (FragMethod=4) when InstrumentID is 1; scores will degrade
-# 2 means TOF
-# 3 means Q-Exactive
-# (note that this is automatically updated by the Analysis Manager based on the instrument type and MSn scan types present for a given dataset)
-InstrumentID=1
-
-#Enzyme ID
-# 0 means No enzyme used
-# 1 means Trypsin (Default); use this along with NTT=0 for a no-enzyme search of a tryptically digested sample
-# 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N, 8: alphaLP, 9: No Enzyme (for peptidomics)
-EnzymeID=1
-
-#Isotope error range
-# Takes into account of the error introduced by choosing non-monoisotopic peak for fragmentation.
-# Useful for accurate precursor ion masses
-# Ignored if the parent mass tolerance is > 0.5Da or 500ppm
-# The combination of -t and -ti determins the precursor mass tolerance.
-# e.g. "-t 20ppm -ti -1,2" tests abs(exp-calc-n*1.00335Da)<20ppm for n=-1, 0, 1, 2.
-IsotopeErrorRange=-1,2
-
-#Number of tryptic termini
-# The number of peptide termini that must have been cleaved by the enzyme (default 1)
-# For trypsin, 2 means fully tryptic only, 1 means partially tryptic, and 0 means no-enzyme search
-NTT=2
-
-#Target/Decoy search mode
-# 0 means don't search decoy database (default)
-# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
-TDA=1
-
-#Number of Threads (by default, uses all available cores)
-#In DMS, a settings file entry for MSGFDBThreads will override this value
-NumThreads=All
-
-#Minimum peptide length to consider
-MinPepLength=9
-
-#Maximum peptide length to consider
-MaxPepLength=50
-
-#Minimum precursor charge to consider (if not specified in the spectrum)
-minCharge=2
-
-#Maximum precursor charge to consider (if not specified in the spectrum)
-maxCharge=5
-
-#Number of matches per spectrum to be reported
-#If this value is greater than 1 then the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
-NumMatchesPerSpec=2
-
-#Amino Acid Modification Examples
-# Specify static modifications using one or more StaticMod= entries
-# Specify dynamic modifications using one or more DynamicMod= entries
-# Modification format is:
-# Mass or CompositionString, Residues, ModType, Position, Name (all the five fields are required).
-# CompositionString can only contain a limited set of elements, primarily C H N O S or P
-#
-# Examples:
-# C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
-# O1, M, opt, any, Oxidation # Oxidation M
-# 15.994915, M, opt, any, Oxidation # Oxidation M (mass is used instead of CompositionStr)
-# H-1N-1O1, NQ, opt, any, Deamidated # Negative numbers are allowed.
-# CH2, K, opt, any, Methyl # Methylation K
-# C2H2O1, K, opt, any, Acetyl # Acetylation K
-# HO3P, STY,opt, any, Phospho # Phosphorylation STY
-# C2H3NO, *, opt, N-term, Carbamidomethyl # Variable Carbamidomethyl N-term
-# H-2O-1, E, opt, N-term, Glu->pyro-Glu # Pyro-glu from E
-# H-3N-1, Q, opt, N-term, Gln->pyro-Glu # Pyro-glu from Q
-# C2H2O, *, opt, Prot-N-term, Acetyl # Acetylation Protein N-term
+#Precursor mass tolerance
+# Examples: 2.5Da or 30ppm
+# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMasstheoMass)
+PrecursorMassTolerance=20ppm
+
+#Max Number of Modifications per peptide
+# If this value is large, the search will be slow
+NumMods=4
+
+#Modifications (see below for examples)
+StaticMod=C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
+StaticMod=229.1629, *, fix, N-term, TMT6plex
+StaticMod=229.1629, K, fix, any, TMT6plex
+
+DynamicMod=O1, M, opt, any, Oxidation # Oxidized methionine
+DynamicMod=-187.152366, K, opt, any, AcNoTMT # Residue tagged by MSGF+ with static TMT6, but is actually acetylated and does not have TMT
+
+#Fragmentation Method
+# 0 means as written in the spectrum or CID if no info (Default)
+# 1 means CID
+# 2 means ETD
+# 3 means HCD
+# 4 means Merge spectra from the same precursor (e.g. CID/ETD pairs, CID/HCD/ETD triplets)
+# (note that for Thermo instruments this is always set to 0 by the Analysis Manager since a _ScanType.txt file is created with this information on a per-scan basis)
+FragmentationMethodID=0
+
+#Instrument ID
+# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
+# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID). Do not merge spectra (FragMethod=4) when InstrumentID is 1; scores will degrade
+# 2 means TOF
+# 3 means Q-Exactive
+# (note that this is automatically updated by the Analysis Manager based on the instrument type and MSn scan types present for a given dataset)
+InstrumentID=1
+
+#Enzyme ID
+# 0 means No enzyme used
+# 1 means Trypsin (Default); use this along with NTT=0 for a no-enzyme search of a tryptically digested sample
+# 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N, 8: alphaLP, 9: No Enzyme (for peptidomics)
+EnzymeID=1
+
+#Isotope error range
+# Takes into account of the error introduced by choosing non-monoisotopic peak for fragmentation.
+# Useful for accurate precursor ion masses
+# Ignored if the parent mass tolerance is > 0.5Da or 500ppm
+# The combination of -t and -ti determins the precursor mass tolerance.
+# e.g. "-t 20ppm -ti -1,2" tests abs(exp-calc-n*1.00335Da)<20ppm for n=-1, 0, 1, 2.
+IsotopeErrorRange=-1,2
+
+#Number of tryptic termini
+# The number of peptide termini that must have been cleaved by the enzyme (default 1)
+# For trypsin, 2 means fully tryptic only, 1 means partially tryptic, and 0 means no-enzyme search
+NTT=2
+
+#Target/Decoy search mode
+# 0 means don't search decoy database (default)
+# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
+TDA=1
+
+#Number of Threads (by default, uses all available cores)
+#In DMS, a settings file entry for MSGFDBThreads will override this value
+NumThreads=All
+
+#Minimum peptide length to consider
+MinPepLength=9
+
+#Maximum peptide length to consider
+MaxPepLength=50
+
+#Minimum precursor charge to consider (if not specified in the spectrum)
+minCharge=2
+
+#Maximum precursor charge to consider (if not specified in the spectrum)
+maxCharge=5
+
+#Number of matches per spectrum to be reported
+#If this value is greater than 1 then the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
+NumMatchesPerSpec=2
+
+#Amino Acid Modification Examples
+# Specify static modifications using one or more StaticMod= entries
+# Specify dynamic modifications using one or more DynamicMod= entries
+# Modification format is:
+# Mass or CompositionString, Residues, ModType, Position, Name (all the five fields are required).
+# CompositionString can only contain a limited set of elements, primarily C H N O S or P
+#
+# Examples:
+# C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
+# O1, M, opt, any, Oxidation # Oxidation M
+# 15.994915, M, opt, any, Oxidation # Oxidation M (mass is used instead of CompositionStr)
+# H-1N-1O1, NQ, opt, any, Deamidated # Negative numbers are allowed.
+# CH2, K, opt, any, Methyl # Methylation K
+# C2H2O1, K, opt, any, Acetyl # Acetylation K
+# HO3P, STY,opt, any, Phospho # Phosphorylation STY
+# C2H3NO, *, opt, N-term, Carbamidomethyl # Variable Carbamidomethyl N-term
+# H-2O-1, E, opt, N-term, Glu->pyro-Glu # Pyro-glu from E
+# H-3N-1, Q, opt, N-term, Gln->pyro-Glu # Pyro-glu from Q
+# C2H2O, *, opt, Prot-N-term, Acetyl # Acetylation Protein N-term
From 8330bc3473e3750da2112a47972918e2d7b21395 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 11:02:25 +0100
Subject: [PATCH 26/34] refactor(cli): typed enums for -outputFormat and
-precursorCal
Drop the lingering String + numeric backcompat for the two enum-shaped
flags whose values are real names rather than IDs (per user direction:
'unless are options like 1 2 3 we should do only string'). After this
commit:
- -outputFormat accepts only `pin` / `tsv` (case-insensitive). The
legacy numeric forms `0` / `1` are no longer recognised; users on
those invocations should switch to the named values. This is a
deliberate breaking change called out in the parameter-modernization
cleanup -- consistency over backcompat in this corner.
- -precursorCal continues to accept only `auto` / `on` / `off`
(case-insensitive), but now via picocli's typed enum matcher rather
than a String + fromString fallback. Invalid values fail fast at
parse time instead of silently mapping to AUTO.
Numeric-ID flags (-m, -inst, -e, -protocol) and 0/1 boolean-style
flags (-tda, -verbose, -addFeatures, -allowDenseCentroidedPeaks,
-edgeScore, -ignoreMetCleavage, -u, -ntt) keep their integer types --
those values are IDs, not names.
Implementation:
- New cli.OutputFormat enum (PIN, TSV).
- MSGFPlusOptions.outputFormat: String -> OutputFormat.
- MSGFPlusOptions.precursorCalMode: String -> SearchParams.PrecursorCalMode.
- effectiveOutputFormat() now returns OutputFormat (was int 0/1).
- effectivePrecursorCalRaw() collapsed into effectivePrecursorCal()
returning the typed enum.
- applyConfigEntry parses both flags via Enum.valueOf so config-file
values like `OutputFormat=pin` and `PrecursorCal=auto` flow through
the same case-insensitive contract as the CLI.
- SearchParams.outputFormat field: int -> OutputFormat. writePin() /
writeTsv() helpers retained (callers in MSGFPlus.runMSGFPlus).
- SearchParams.PrecursorCalMode.fromString() deleted -- no callers
after the resolver returns the typed enum directly.
- New static factory MSGFPlusOptions.commandLine(opts) returns a
CommandLine with caseInsensitiveEnumValuesAllowed(true). All
call sites (MSGFPlus.main + 5 test files) routed through it so
enum case-insensitivity is uniform.
- docs/output.md updated to show `-outputFormat pin` / `tsv` and
notes the numeric forms are no longer accepted.
Tests: TestDirectPinWriter.outputFormatAcceptsOnlyPinAndTsv pins the
new contract (numeric/legacy values rejected, named values accepted
case-insensitively). TestPrecursorCalScaffolding migrated to enum
constants and to a picocli rejection check for invalid values. The
old fromString-fallback test is replaced by the rejection test.
Scoped sweep: 78 tests, 0 failures, 0 errors, 4 skipped.
---
docs/output.md | 6 ++-
.../java/edu/ucsd/msjava/cli/MSGFPlus.java | 2 +-
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 28 ++++++-------
.../edu/ucsd/msjava/cli/OutputFormat.java | 17 ++++++++
.../ucsd/msjava/msdbsearch/SearchParams.java | 37 ++++--------------
src/test/java/msgfplus/TestCollaboration.java | 2 +-
.../java/msgfplus/TestDirectPinWriter.java | 39 +++++++++++--------
src/test/java/msgfplus/TestIPRG.java | 2 +-
.../msgfplus/TestMinSpectraPerThread.java | 4 +-
src/test/java/msgfplus/TestPercolator.java | 2 +-
.../msgfplus/TestPrecursorCalIntegration.java | 7 ++--
.../msgfplus/TestPrecursorCalScaffolding.java | 30 +++++++-------
12 files changed, 94 insertions(+), 82 deletions(-)
create mode 100644 src/main/java/edu/ucsd/msjava/cli/OutputFormat.java
diff --git a/docs/output.md b/docs/output.md
index bb840273..f091479c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -8,8 +8,10 @@ Select the format with `-outputFormat`:
| Flag | Format | Extension | Typical use |
|---|---|---|---|
-| `-outputFormat 0` (default) | Percolator `.pin` | `.pin` | Feed to Percolator / MS²Rescore / Mokapot for FDR-calibrated rescoring |
-| `-outputFormat 1` | Tab-separated values | `.tsv` | Direct inspection / downstream tools that consume TSV |
+| `-outputFormat pin` (default) | Percolator `.pin` | `.pin` | Feed to Percolator / MS²Rescore / Mokapot for FDR-calibrated rescoring |
+| `-outputFormat tsv` | Tab-separated values | `.tsv` | Direct inspection / downstream tools that consume TSV |
+
+`-outputFormat` accepts the named values `pin` and `tsv` (case-insensitive). Numeric forms (`0`, `1`) accepted by older releases are no longer recognised — pass the named value instead.
The output path (`-o`) must use the matching extension. If `-o` is omitted, MS-GF+ writes `.pin` (or `.tsv`) in the spectrum file's directory.
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
index bdc330cb..a75de448 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
@@ -53,7 +53,7 @@ public static void main(String argv[]) {
argvSnapshot = argv == null ? new String[0] : argv.clone();
MSGFPlusOptions opts = new MSGFPlusOptions();
- CommandLine cl = new CommandLine(opts);
+ CommandLine cl = MSGFPlusOptions.commandLine(opts);
if (argv.length == 0) {
printToolInfo();
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index 0991add6..453b0d87 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -1,9 +1,11 @@
package edu.ucsd.msjava.cli;
+import edu.ucsd.msjava.msdbsearch.SearchParams.PrecursorCalMode;
import edu.ucsd.msjava.msutil.ActivationMethod;
import edu.ucsd.msjava.msutil.Enzyme;
import edu.ucsd.msjava.msutil.InstrumentType;
import edu.ucsd.msjava.msutil.Protocol;
+import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
@@ -31,6 +33,13 @@
description = "MS-GF+: peptide identification by database search of mass spectra.")
public final class MSGFPlusOptions {
+ /** Build a {@link CommandLine} configured for MS-GF+: enums match
+ * case-insensitively (so {@code -outputFormat pin} and {@code -outputFormat PIN}
+ * both work) and the parser uses the standard MS-GF+ usage layout. */
+ public static CommandLine commandLine(MSGFPlusOptions opts) {
+ return new CommandLine(opts).setCaseInsensitiveEnumValuesAllowed(true);
+ }
+
// ---------- input (required at runtime, but may be provided via -conf) ----------
@Option(names = "-s", paramLabel = "SpectrumFile",
@@ -154,11 +163,11 @@ public final class MSGFPlusOptions {
@Option(names = "-outputFormat", paramLabel = "Format",
description = "Output format: pin (Default) or tsv")
- public String outputFormat;
+ public OutputFormat outputFormat;
@Option(names = "-precursorCal", paramLabel = "Mode",
description = "Precursor calibration mode: auto (Default), on, off")
- public String precursorCalMode;
+ public PrecursorCalMode precursorCalMode;
@Option(names = "-ccm", paramLabel = "Mass",
description = "Charge carrier mass; Default: 1.00727649 (proton)")
@@ -252,15 +261,8 @@ public final class MSGFPlusOptions {
public double effectiveChargeCarrierMass() { return chargeCarrierMass != null ? chargeCarrierMass : 1.00727649; }
public String effectiveDecoyPrefix() { return decoyPrefix != null ? decoyPrefix : "XXX"; }
- public String effectivePrecursorCalRaw() { return precursorCalMode != null ? precursorCalMode : "auto"; }
-
- /** 0 = pin (default), 1 = tsv. */
- public int effectiveOutputFormat() {
- if (outputFormat == null) return 0;
- String n = outputFormat.trim().toLowerCase();
- if (n.equals("tsv") || n.equals("1")) return 1;
- return 0;
- }
+ public PrecursorCalMode effectivePrecursorCal() { return precursorCalMode != null ? precursorCalMode : PrecursorCalMode.AUTO; }
+ public OutputFormat effectiveOutputFormat() { return outputFormat != null ? outputFormat : OutputFormat.PIN; }
public PrecursorTolerance effectivePrecursorTolerance() {
return precursorTolerance != null ? precursorTolerance : PrecursorTolerance.parse("20ppm");
@@ -403,8 +405,8 @@ private String applyConfigEntry(String key, String value, String fileName) {
case "verbose": if (verbose == null) verbose = Integer.parseInt(value); return null;
case "tda": if (tdaStrategy == null) tdaStrategy = Integer.parseInt(value); return null;
case "addfeatures": if (addFeatures == null) addFeatures = Integer.parseInt(value); return null;
- case "outputformat": if (outputFormat == null) outputFormat = value; return null;
- case "precursorcal": if (precursorCalMode == null) precursorCalMode = value; return null;
+ case "outputformat": if (outputFormat == null) outputFormat = OutputFormat.valueOf(value.trim().toUpperCase(java.util.Locale.ROOT)); return null;
+ case "precursorcal": if (precursorCalMode == null) precursorCalMode = PrecursorCalMode.valueOf(value.trim().toUpperCase(java.util.Locale.ROOT)); return null;
case "chargecarriermass": if (chargeCarrierMass == null) chargeCarrierMass = Double.parseDouble(value); return null;
case "maxmissedcleavages": if (maxMissedCleavages == null) maxMissedCleavages = Integer.parseInt(value); return null;
case "nummods": if (maxNumMods == null) configMaxNumMods = Integer.parseInt(value); return null;
diff --git a/src/main/java/edu/ucsd/msjava/cli/OutputFormat.java b/src/main/java/edu/ucsd/msjava/cli/OutputFormat.java
new file mode 100644
index 00000000..2e570882
--- /dev/null
+++ b/src/main/java/edu/ucsd/msjava/cli/OutputFormat.java
@@ -0,0 +1,17 @@
+package edu.ucsd.msjava.cli;
+
+/**
+ * Search output format selected by {@code -outputFormat}. Picocli matches
+ * incoming values case-insensitively (see
+ * {@code @Command(caseInsensitiveEnumValuesAllowed = true)}).
+ *
+ * Numeric forms ({@code 0} / {@code 1}) accepted by older releases are
+ * intentionally not supported. Users on legacy invocations should switch
+ * to the named values.
+ */
+public enum OutputFormat {
+ /** Percolator {@code .pin} (default). */
+ PIN,
+ /** Tab-separated values, direct inspection / downstream tools. */
+ TSV
+}
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index 55647240..1bcdda6c 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -2,6 +2,7 @@
import edu.ucsd.msjava.cli.IntRange;
import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import edu.ucsd.msjava.cli.OutputFormat;
import edu.ucsd.msjava.cli.PrecursorTolerance;
import edu.ucsd.msjava.msgf.Tolerance;
import edu.ucsd.msjava.msutil.*;
@@ -32,28 +33,7 @@ public class SearchParams {
public enum PrecursorCalMode {
AUTO,
ON,
- OFF;
-
- /**
- * Case-insensitive string to enum conversion. Unknown values fall
- * back to {@link #AUTO} so that downstream code never crashes if a
- * typo slips past CLI parsing.
- */
- public static PrecursorCalMode fromString(String s) {
- if (s == null) return AUTO;
- String normalized = s.trim().toLowerCase();
- switch (normalized) {
- case "on":
- return ON;
- case "off":
- return OFF;
- case "auto":
- case "":
- return AUTO;
- default:
- return AUTO;
- }
- }
+ OFF
}
private List dbSearchIOList;
@@ -94,7 +74,7 @@ public static PrecursorCalMode fromString(String s) {
private boolean allowDenseCentroidedPeaks;
private int minMSLevel;
private int maxMSLevel;
- private int outputFormat; // 0=pin (default), 1=tsv — mzid output removed
+ private OutputFormat outputFormat;
private PrecursorCalMode precursorCalMode = PrecursorCalMode.AUTO;
public SearchParams() {
@@ -287,17 +267,16 @@ public int getMaxMSLevel() {
return maxMSLevel;
}
- /** 0=pin (default), 1=tsv. */
- public int getOutputFormat() {
+ public OutputFormat getOutputFormat() {
return outputFormat;
}
public boolean writeTsv() {
- return outputFormat == 1;
+ return outputFormat == OutputFormat.TSV;
}
public boolean writePin() {
- return outputFormat == 0;
+ return outputFormat == OutputFormat.PIN;
}
/**
@@ -350,7 +329,7 @@ public String parse(MSGFPlusOptions opts) {
}
dbSearchIOList = new ArrayList<>();
- String defaultExt = outputFormat == 1 ? ".tsv" : ".pin";
+ String defaultExt = outputFormat == OutputFormat.TSV ? ".tsv" : ".pin";
if (!specPath.isDirectory()) {
SpecFileFormat specFormat = SpecFileFormat.getSpecFileFormat(specPath.getName());
@@ -487,7 +466,7 @@ public String parse(MSGFPlusOptions opts) {
}
allowDenseCentroidedPeaks = opts.effectiveAllowDenseCentroidedPeaks() == 1;
- precursorCalMode = PrecursorCalMode.fromString(opts.effectivePrecursorCalRaw());
+ precursorCalMode = opts.effectivePrecursorCal();
IntRange ms = opts.effectiveMSLevel();
minMSLevel = ms.min;
diff --git a/src/test/java/msgfplus/TestCollaboration.java b/src/test/java/msgfplus/TestCollaboration.java
index 246ac30b..7edb50db 100644
--- a/src/test/java/msgfplus/TestCollaboration.java
+++ b/src/test/java/msgfplus/TestCollaboration.java
@@ -28,7 +28,7 @@ public void testSujunLiIndiana()
MSGFPlusOptions paramManager = new MSGFPlusOptions();
- String msg = null; new CommandLine(paramManager).parseArgs(argv);
+ String msg = null; MSGFPlusOptions.commandLine(paramManager).parseArgs(argv);
if(msg != null)
System.out.println(msg);
assertTrue(msg == null);
diff --git a/src/test/java/msgfplus/TestDirectPinWriter.java b/src/test/java/msgfplus/TestDirectPinWriter.java
index 09b72600..14b9d76c 100644
--- a/src/test/java/msgfplus/TestDirectPinWriter.java
+++ b/src/test/java/msgfplus/TestDirectPinWriter.java
@@ -1,12 +1,14 @@
package msgfplus;
import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import edu.ucsd.msjava.cli.OutputFormat;
import edu.ucsd.msjava.msdbsearch.DatabaseMatch;
import edu.ucsd.msjava.msdbsearch.SearchParams;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.ActivationMethod;
import edu.ucsd.msjava.msutil.Enzyme;
import edu.ucsd.msjava.output.DirectPinWriter;
+import picocli.CommandLine;
import org.junit.Assert;
import org.junit.Test;
@@ -42,14 +44,14 @@ private MSGFPlusOptions buildOpts() throws URISyntaxException {
@Test
public void pinOutputFormatFlagIsAccepted() throws URISyntaxException {
MSGFPlusOptions opts = buildOpts();
- opts.outputFormat = "pin";
- Assert.assertEquals(0, opts.effectiveOutputFormat());
+ opts.outputFormat = OutputFormat.PIN;
+ Assert.assertEquals(OutputFormat.PIN, opts.effectiveOutputFormat());
}
@Test
public void writePinGetterReflectsOutputFormat() throws URISyntaxException {
MSGFPlusOptions opts = buildOpts();
- opts.outputFormat = "pin";
+ opts.outputFormat = OutputFormat.PIN;
SearchParams params = new SearchParams();
Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
@@ -59,26 +61,31 @@ public void writePinGetterReflectsOutputFormat() throws URISyntaxException {
}
@Test
- public void allOutputFormatEnumIndicesAreAccepted() throws URISyntaxException {
- // Valid outputFormat values after mzid removal: pin (default) and tsv.
- for (String value : new String[]{"pin", "tsv", "0", "1"}) {
- MSGFPlusOptions opts = buildOpts();
- opts.outputFormat = value;
- int eff = opts.effectiveOutputFormat();
- Assert.assertTrue("'" + value + "' should map to 0 or 1 but got " + eff, eff == 0 || eff == 1);
+ public void outputFormatAcceptsOnlyPinAndTsv() throws URISyntaxException {
+ // Picocli matches enum values case-insensitively per the @Command setting.
+ for (String value : new String[]{"pin", "PIN", "Pin", "tsv", "TSV", "Tsv"}) {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ MSGFPlusOptions.commandLine(opts).parseArgs("-outputFormat", value);
+ Assert.assertNotNull("'" + value + "' should parse to a valid OutputFormat", opts.outputFormat);
}
- // Regression gate: old "mzid" and "both" (2, 3) collapse to pin.
- for (String value : new String[]{"mzid", "both", "2", "3"}) {
- MSGFPlusOptions opts = buildOpts();
- opts.outputFormat = value;
- Assert.assertEquals("Removed format '" + value + "' must collapse to pin (0)", 0, opts.effectiveOutputFormat());
+ // Numeric forms (0/1) and removed legacy values (mzid, both, 2, 3) are
+ // intentionally rejected -- the typed enum is part of the consistency
+ // sweep called out in the parameter-modernization cleanup.
+ for (String value : new String[]{"0", "1", "2", "3", "mzid", "both", ""}) {
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ try {
+ MSGFPlusOptions.commandLine(opts).parseArgs("-outputFormat", value);
+ Assert.fail("'" + value + "' should be rejected by picocli enum matching");
+ } catch (CommandLine.ParameterException expected) {
+ // ok
+ }
}
}
@Test
public void pinHeaderColumnsIncludeRequiredPercolatorFields() throws Exception {
MSGFPlusOptions opts = buildOpts();
- opts.outputFormat = "pin";
+ opts.outputFormat = OutputFormat.PIN;
SearchParams params = new SearchParams();
Assert.assertNull(params.parse(opts));
diff --git a/src/test/java/msgfplus/TestIPRG.java b/src/test/java/msgfplus/TestIPRG.java
index 8180ead0..51b46496 100644
--- a/src/test/java/msgfplus/TestIPRG.java
+++ b/src/test/java/msgfplus/TestIPRG.java
@@ -34,7 +34,7 @@ public void countProteins()
MSGFPlusOptions paramManager = new MSGFPlusOptions();
- String msg = null; new CommandLine(paramManager).parseArgs(argv);
+ String msg = null; MSGFPlusOptions.commandLine(paramManager).parseArgs(argv);
if(msg != null)
System.err.println("Error: " + msg);
assertTrue(msg == null);
diff --git a/src/test/java/msgfplus/TestMinSpectraPerThread.java b/src/test/java/msgfplus/TestMinSpectraPerThread.java
index 42863ed4..eea5074e 100644
--- a/src/test/java/msgfplus/TestMinSpectraPerThread.java
+++ b/src/test/java/msgfplus/TestMinSpectraPerThread.java
@@ -16,7 +16,7 @@ public void defaultIs250() {
@Test
public void overrideAppliesThroughGetter() {
MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs("-minSpectraPerThread", "50");
+ MSGFPlusOptions.commandLine(opts).parseArgs("-minSpectraPerThread", "50");
Assert.assertEquals(50, opts.effectiveMinSpectraPerThread());
}
@@ -26,7 +26,7 @@ public void parsesZero() {
// so '0' is parseable here. Range checks moved to SearchParams.parse
// (which would reject zero earlier in the search-engine flow if needed).
MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs("-minSpectraPerThread", "0");
+ MSGFPlusOptions.commandLine(opts).parseArgs("-minSpectraPerThread", "0");
Assert.assertEquals(0, opts.effectiveMinSpectraPerThread());
}
}
diff --git a/src/test/java/msgfplus/TestPercolator.java b/src/test/java/msgfplus/TestPercolator.java
index 2ab91cd3..b61d23e7 100644
--- a/src/test/java/msgfplus/TestPercolator.java
+++ b/src/test/java/msgfplus/TestPercolator.java
@@ -22,7 +22,7 @@ public void testAddFeatures() throws URISyntaxException {
String[] argv = {"-s", specFile.getPath(), "-d", dbFile.getPath(), "-addFeatures", "1", "-m", "3"};
MSGFPlusOptions opts = new MSGFPlusOptions();
- new CommandLine(opts).parseArgs(argv);
+ MSGFPlusOptions.commandLine(opts).parseArgs(argv);
assertTrue(MSGFPlus.runMSGFPlus(opts) == null);
}
diff --git a/src/test/java/msgfplus/TestPrecursorCalIntegration.java b/src/test/java/msgfplus/TestPrecursorCalIntegration.java
index d20e34ed..fb85c668 100644
--- a/src/test/java/msgfplus/TestPrecursorCalIntegration.java
+++ b/src/test/java/msgfplus/TestPrecursorCalIntegration.java
@@ -2,6 +2,7 @@
import edu.ucsd.msjava.cli.MSGFPlus;
import edu.ucsd.msjava.cli.MSGFPlusOptions;
+import edu.ucsd.msjava.msdbsearch.SearchParams.PrecursorCalMode;
import edu.ucsd.msjava.msdbsearch.SearchParamsTest;
import edu.ucsd.msjava.msutil.DBSearchIOFiles;
import edu.ucsd.msjava.msutil.SpecFileFormat;
@@ -63,7 +64,7 @@ public void precursorCalOffMatchesBaseline() throws Exception {
File baselineOut = new File(workDir.toFile(), "baseline.pin");
MSGFPlusOptions offManager = buildOpts(offOut);
- offManager.precursorCalMode = "off";
+ offManager.precursorCalMode = PrecursorCalMode.OFF;
String offErr = MSGFPlus.runMSGFPlus(offManager);
Assert.assertNull("runMSGFPlus(off) failed: " + offErr, offErr);
Assert.assertTrue("off.pin must exist", offOut.exists());
@@ -106,11 +107,11 @@ public void precursorCalOffIsDeterministic() throws Exception {
File secondOut = new File(workDir.toFile(), "second.pin");
MSGFPlusOptions firstManager = buildOpts(firstOut);
- firstManager.precursorCalMode = "off";
+ firstManager.precursorCalMode = PrecursorCalMode.OFF;
Assert.assertNull(MSGFPlus.runMSGFPlus(firstManager));
MSGFPlusOptions secondManager = buildOpts(secondOut);
- secondManager.precursorCalMode = "off";
+ secondManager.precursorCalMode = PrecursorCalMode.OFF;
Assert.assertNull(MSGFPlus.runMSGFPlus(secondManager));
List firstPsms = extractPsmItems(firstOut);
diff --git a/src/test/java/msgfplus/TestPrecursorCalScaffolding.java b/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
index 102f3b0b..8f1c5e80 100644
--- a/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
+++ b/src/test/java/msgfplus/TestPrecursorCalScaffolding.java
@@ -50,7 +50,7 @@ public void precursorCalDefaultIsAuto() throws URISyntaxException {
@Test
public void precursorCalOnIsParsed() throws URISyntaxException {
MSGFPlusOptions opts = buildOpts();
- opts.precursorCalMode = "on";
+ opts.precursorCalMode = PrecursorCalMode.ON;
SearchParams params = new SearchParams();
Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals(PrecursorCalMode.ON, params.getPrecursorCalMode());
@@ -59,7 +59,7 @@ public void precursorCalOnIsParsed() throws URISyntaxException {
@Test
public void precursorCalOffIsParsed() throws URISyntaxException {
MSGFPlusOptions opts = buildOpts();
- opts.precursorCalMode = "off";
+ opts.precursorCalMode = PrecursorCalMode.OFF;
SearchParams params = new SearchParams();
Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
Assert.assertEquals(PrecursorCalMode.OFF, params.getPrecursorCalMode());
@@ -67,20 +67,24 @@ public void precursorCalOffIsParsed() throws URISyntaxException {
@Test
public void precursorCalIsCaseInsensitive() throws URISyntaxException {
- MSGFPlusOptions opts = buildOpts();
- opts.precursorCalMode = "OFF";
- SearchParams params = new SearchParams();
- Assert.assertNull("SearchParams.parse should succeed", params.parse(opts));
- Assert.assertEquals(PrecursorCalMode.OFF, params.getPrecursorCalMode());
+ // Picocli's enum matcher honours @Command(caseInsensitiveEnumValuesAllowed = true).
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ MSGFPlusOptions.commandLine(opts).parseArgs("-precursorCal", "OFF");
+ Assert.assertEquals(PrecursorCalMode.OFF, opts.precursorCalMode);
}
@Test
- public void unknownPrecursorCalValueFallsBackToAuto() {
- // Unit-level contract: unknown strings must not crash the search path;
- // instead they silently fall back to AUTO.
- Assert.assertEquals(PrecursorCalMode.AUTO, PrecursorCalMode.fromString("bogus"));
- Assert.assertEquals(PrecursorCalMode.AUTO, PrecursorCalMode.fromString(null));
- Assert.assertEquals(PrecursorCalMode.AUTO, PrecursorCalMode.fromString(""));
+ public void unknownPrecursorCalValueIsRejected() {
+ // The typed enum replaces the previous String + fromString fallback;
+ // invalid values are now rejected by picocli at parse time instead
+ // of silently mapping to AUTO.
+ MSGFPlusOptions opts = new MSGFPlusOptions();
+ try {
+ MSGFPlusOptions.commandLine(opts).parseArgs("-precursorCal", "bogus");
+ Assert.fail("'bogus' should not parse as a PrecursorCalMode");
+ } catch (picocli.CommandLine.ParameterException expected) {
+ // ok
+ }
}
@Test
From b7dce4cab2023b59814198def29896a7676af104 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 11:07:45 +0100
Subject: [PATCH 27/34] docs(changelog): document parameter-modernization sweep
in vNEXT
Expand the vNEXT entry to cover the full PR #25 modernization stack:
- CLI parser modernisation: picocli-driven MSGFPlusOptions; -conf
flow with case-insensitive keys + 13 legacy aliases preserved;
numeric/enum range validation surface restored.
- Breaking changes: -outputFormat numeric forms (0/1) removed in
favour of named pin/tsv (typed enum); -precursorCal switched to
typed enum (rejects unknown values instead of silently mapping
to AUTO); spectrum input narrowed to mzML + mgf only (mzXML, MS2,
PKL, _dta.txt parsers deleted); deprecated MSGFDB entry point and
its dead MSGF/MSGFLib siblings removed.
- Internal refactor: edu.ucsd.msjava.params package deleted (~2,100
LOC across 18 classes); package reorg (ui/ -> cli/, mzid/ -> output/,
parser/ -> mgf/, net.pempek.unicode -> mgf); new typed value
classes (MSGFPlusOptions, PrecursorTolerance, IntRange, OutputFormat);
picocli 4.7.6 dep added.
- Bench gate: prior Astral 3-arm run confirmed bit-identical PSM
target/decoy counts (89,479 / 46,792) between baseline and new
branch in -precursorCal off mode. Table embedded in the entry.
- Earlier in cycle: precursor calibration + Percolator pin output
bullets retained.
No code change; pure docs.
---
docs/changelog.md | 139 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 130 insertions(+), 9 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index 29d94d53..713e0bd5 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,15 +2,136 @@
[MS-GF+ Documentation home](readme.md)
-**vNEXT — Unreleased (breaking change)**
-
-- **BREAKING:** mzIdentML (`.mzid`) support fully removed — no backward-compatibility shim. MS-GF+ now writes only Percolator `.pin` (default) or TSV, and every `.mzid`-related utility has been deleted:
- - **Output:** `MZIdentMLGen`, `AnalysisProtocolCollectionGen`, `MzIDTest` — deleted. `-o foo.mzid` is rejected at argument-parse time (no extension rewrite). `Unimod` and `UnimodComposition` are retained for future PTM-aware enhancements to `DirectPinWriter` — they carry the modification-accession + mass tables that a richer pin output would need to populate the `Peptide` column with proper Unimod references.
- - **Input / legacy tools:** `MzIDToTsv` (CLI `edu.ucsd.msjava.ui.MzIDToTsv`), `MzIDParser`, `AnnotatedSpectra`, `ScoringParamGen` — deleted. Users who need to post-process legacy `.mzid` files must use MS-GF+ v2026.03.25 or earlier, or an external mzid converter.
- - `-outputFormat` now accepts only `pin` (default) and `tsv`. Integer aliases: `0=pin, 1=tsv`. Previous `0=mzid, 2=both, 3=pin` layout is rejected.
- - `-o OutputFile` must end in `.pin` or `.tsv`. `.mzid` paths are rejected.
-- Added precursor mass calibration: `-precursorCal auto|on|off` (default `auto`). Merged via PR #22.
-- Added Percolator `.pin` output with OpenMS-parity features (`enzN`, `enzC`, `enzInt`, `mass`, `lnDeltaSpecEValue`, `matchedIonRatio`, `longest_b`, `longest_y`, `longest_y_pct`) and lowercase renames (`peplen`, `charge2/3/4`, `dm`, `absdm`, `isotope_error`) for OpenMS `PercolatorAdapter` interoperability. Merged via PR #22 + this PR.
+**vNEXT — Unreleased (multiple breaking changes)**
+
+This release modernises the CLI surface and trims a large amount of
+legacy code. Net change: roughly **−2,400 LOC** vs the previous
+release, with the CLI flag contract preserved for normal users and a
+few deliberate breaking changes called out below.
+
+### CLI parser modernisation
+
+- The CLI is now driven by [picocli](https://picocli.info) via
+ `edu.ucsd.msjava.cli.MSGFPlusOptions`. All flags are declared once
+ with typed Java fields; help (`-h`/`--help`) and version (`-V`) are
+ auto-generated.
+- `-conf` config-file inputs flow through the same path: any field
+ the CLI did not set is filled in from the config file (CLI takes
+ precedence). Legacy aliases continue to be recognised, including
+ `IsotopeError` → `IsotopeErrorRange`, `FragmentationMethod` →
+ `FragmentationMethodID`, `Instrument` → `InstrumentID`, `Enzyme`
+ → `EnzymeID`, `Protocol` → `ProtocolID`, `NumTolerableTermini` →
+ `NTT`, `MinNumPeaks` → `MinNumPeaksPerSpectrum`, `MaxNumMods` /
+ `MaxNumModsPerPeptide` → `NumMods`, `MinPeptideLength` /
+ `minLength` → `MinPepLength`, `MaxPeptideLength` / `maxLength` →
+ `MaxPepLength`, `PMTolerance` / `ParentMassTolerance` →
+ `PrecursorMassTolerance`. Config-file keys are matched
+ case-insensitively (so `minCharge=`, `MinCharge=`, and `MINCHARGE=`
+ all work).
+- `DynamicMod=`, `StaticMod=`, and `CustomAA=` config-file entries
+ continue to be repeatable; each line is collected into the AA set.
+- Validation surface restored: invalid numeric values (e.g.
+ `-thread 0`, `-ntt 5`, `-tda 2`) and out-of-range enum-like IDs
+ (e.g. `-m 99`, `-inst 99`) now produce a clean user-facing error
+ string instead of a stack trace.
+
+### Breaking changes
+
+- **`-outputFormat` accepts only named values.** `pin` (default) and
+ `tsv` are the supported forms (case-insensitive). The legacy
+ numeric aliases `0` and `1` are no longer accepted; users on those
+ invocations should switch to the named values.
+- **`-precursorCal` is now a typed enum.** `auto` (default), `on`,
+ and `off` are still the only valid values; invalid values now fail
+ fast at parse time instead of silently mapping to `auto`.
+- **Spectrum input narrowed to `*.mzML` and `*.mgf`.** Support for
+ `*.mzXML`, `*.ms2`, `*.pkl`, and `*_dta.txt` has been removed
+ along with their parsers. `MgfSpectrumParser`, `BufferedLineReader`,
+ `BufferedRandomAccessLineReader`, and the shared `LineReader` /
+ `SpectrumParser` interfaces moved from `edu.ucsd.msjava.parser` to
+ `edu.ucsd.msjava.mgf` to reflect the trimmed scope.
+- **Deprecated `MSGFDB` entry point removed.** `cli.MSGFDB` (legacy
+ v8091, "08/06/2012") and `docs/ms-gfdb.md` have been deleted, along
+ with `ParamManager.addMSGFDBParams` / `addMSGFParams` /
+ `addMSGFLibParams` (the latter two were dead — no entry points
+ existed). The MSGFDB-only `ParamNameEnum` entries `C13`, `NNET`,
+ `UNIFORM_AA_PROBABILITY`, and `OUTPUT_FILE` are gone, as are the
+ `showFDR`, `showDecoy`, and `replicate` config-file keys.
+- mzIdentML (`.mzid`) support remains fully removed (introduced in a
+ prior commit on this branch). MS-GF+ writes only `.pin` (default)
+ or `.tsv`. Every `.mzid`-related utility has been deleted:
+ - **Output:** `MZIdentMLGen`, `AnalysisProtocolCollectionGen`,
+ `MzIDTest`. `Unimod` and `UnimodComposition` are retained for
+ future PTM-aware enhancements to `DirectPinWriter` — they carry
+ the modification-accession + mass tables a richer pin output
+ would need.
+ - **Input / legacy tools:** `MzIDToTsv` (CLI
+ `edu.ucsd.msjava.ui.MzIDToTsv`), `MzIDParser`,
+ `AnnotatedSpectra`, `ScoringParamGen`. Users who need to
+ post-process legacy `.mzid` files must use MS-GF+ v2026.03.25
+ or earlier, or an external mzid converter.
+
+### Internal refactor
+
+- The entire `edu.ucsd.msjava.params` package has been deleted
+ (~2,100 LOC across 18 classes including `ParamManager`, the
+ `Parameter` / `IntParameter` / `FloatParameter` / `IntRangeParameter`
+ / `ToleranceParameter` / `EnumParameter` / `FileParameter` /
+ `StringParameter` hierarchy, and `ParamParser`). Two small helpers
+ (`ParamObject`, `UserParam`) moved to `edu.ucsd.msjava.msutil`
+ where their `ActivationMethod` / `Enzyme` / `InstrumentType` /
+ `Protocol` consumers already live.
+- Top-level package reorganisation:
+ - `edu.ucsd.msjava.ui.MSGFPlus` → `edu.ucsd.msjava.cli.MSGFPlus`.
+ - `edu.ucsd.msjava.mzid.{DirectPinWriter,DirectTSVWriter,Unimod,UnimodComposition}`
+ → `edu.ucsd.msjava.output.*`.
+ - `edu.ucsd.msjava.parser.*` → `edu.ucsd.msjava.mgf.*` (after
+ dropping the legacy-format parsers).
+ - `net.pempek.unicode.UnicodeBOMInputStream` →
+ `edu.ucsd.msjava.mgf.UnicodeBOMInputStream`.
+ - `edu.ucsd.msjava.mslibsearch.ProcessedSpectrum` deleted (no
+ references).
+- New typed value classes in `cli/`:
+ - `MSGFPlusOptions` — picocli `@Command` with all MSGFPlus flags.
+ - `PrecursorTolerance` — symmetric or asymmetric tolerance with
+ matching-unit + non-negative validation.
+ - `IntRange` — inclusive integer range used by `-ti`, `-msLevel`,
+ `-index`.
+ - `OutputFormat` — enum (`PIN`, `TSV`).
+- `picocli` 4.7.6 added as a runtime dependency.
+- New regression tests covering the `CustomAA=` config-file path,
+ the `-m 4 = UVPD` mapping, case-insensitive config keys, and
+ out-of-range flag rejection. The full scoped test sweep includes
+ 78 tests.
+
+### Bench gate
+
+The Astral 3-arm correctness gate (`benchmark/run_astral_3arm.sh`,
+ProteoBench Module 8) on the prior modernisation pass confirmed
+**bit-identical PSM target/decoy counts** to the pre-PR#22 baseline
+JAR when `-precursorCal off` is supplied:
+
+| Arm | JAR | -precursorCal | targets | decoys |
+|---|---|---|---|---|
+| A | baseline (pre-PR #22) | n/a | 89,479 | 46,792 |
+| B | new branch | off | **89,479** | **46,792** |
+| C | new branch | auto | 89,360 | 46,913 |
+
+Arm C's small delta is the calibrator's expected effect when AUTO
+collects ≥200 confident PSMs. The CLI rewrite does not touch the
+search hot path, so this gate continues to apply for the additional
+fixes layered on top.
+
+### Earlier in this release cycle
+
+- Added precursor mass calibration: `-precursorCal auto|on|off`
+ (default `auto`). Merged via PR #22.
+- Added Percolator `.pin` output with OpenMS-parity features
+ (`enzN`, `enzC`, `enzInt`, `mass`, `lnDeltaSpecEValue`,
+ `matchedIonRatio`, `longest_b`, `longest_y`, `longest_y_pct`) and
+ lowercase column renames (`peplen`, `charge2/3/4`, `dm`, `absdm`,
+ `isotope_error`) for OpenMS `PercolatorAdapter` interoperability.
+ Merged via PR #22 + this PR.
**v2026.03.25**
From 657cc5e2ebc603ea8b8acdf1a3088ae156aa1ef7 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol
Date: Mon, 27 Apr 2026 11:27:23 +0100
Subject: [PATCH 28/34] refactor: drop ~2,074 LOC of dead/redundant code (audit
pass)
Combined sweep of cuts surfaced by the post-modernization LOC audit.
Net change: 13 files, +57 / -2,131 = -2,074 LOC.
### Deleted dead classes (-2,051 LOC, 6 files)
Verified zero callers across src/main and src/test before removal:
- msscorer/ScoringParameterGenerator (732 LOC) and
ScoringParameterGeneratorWithErrors (880 LOC) -- standalone main()
scoring-param tools from the pre-modernization era. Pre-built
scoring model .tsv files are committed in resources; these
generators are not invoked at search time and have no remaining
consumers.
- output/Unimod (85 LOC) and output/UnimodComposition (133 LOC) --
residue from the deleted mzIdentML write side. Atom.java's "Unimod
mod bricks" comment is the only remaining reference and refers to
upstream data, not the deleted classes.
- msgf/ToolLauncher (154 LOC) -- abstract launcher with no concrete
implementations.
- msutil/ScoredString (67 LOC) -- duplicate of fdr/ScoredString with
no live callers (all usages resolve to the fdr.* version).
### Inlined effective*() resolvers + helper collapse (-23 LOC net)
- ~20 of the trivial `field != null ? field : default` resolvers in
MSGFPlusOptions are inlined at the SearchParams.parse call sites.
The non-trivial registry-resolving ones (effectiveActivationMethod,
effectiveInstrumentType, effectiveEnzyme, effectiveProtocol) and a
handful of frequent ones (effectiveOutputFormat, effectiveMin/Max
PeptideLength etc.) stay since their length pays for itself.
- ActivationMethodAvailability nested class collapsed -- it only
hid one InstrumentType.getAllRegisteredInstrumentTypes().length call
that is now inline in validate().
- SearchParams.getOutputFormat() and SearchParams.writePin() removed
-- writePin() had two callers in MSGFPlus.java which now use
!writeTsv() instead; getOutputFormat() had zero callers.
### stripComment dedup (-7 LOC)
The two implementations of "split-on-#-and-trim" (SearchParams.
getConfigLineWithoutComment and MSGFPlusOptions.stripComment)
collapsed: stripComment is the canonical version (package-public),
SearchParams.getConfigLineWithoutComment delegates to it, and
AminoAcidSet.parseConfigEntry calls stripComment directly.
### Validation surface expanded
MSGFPlusOptions.validate() now also rejects a -mod / ModificationFile=
path that does not exist, returning a user-facing error string. New
regression test (validateRejectsMissingModificationFile) pins both
the CLI path and the config-file path.
Verified: scoped sweep (TestDirectPinWriter, TestMSUtils, TestSA,
TestMisc, TestRunManifestWriter, SearchParamsTest, TestPercolator,
TestMinSpectraPerThread, TestPrecursorCalScaffolding,
TestCandidatePeptideGrid + ConsideringMetCleavage,
MSGFPlusOptionsConfigFileTest, MSGFPlusOptionsActivationMethodTest):
78 tests, 0 failures, 0 errors, 3 skipped.
---
.../java/edu/ucsd/msjava/cli/MSGFPlus.java | 2 +-
.../edu/ucsd/msjava/cli/MSGFPlusOptions.java | 46 +-
.../ucsd/msjava/msdbsearch/SearchParams.java | 54 +-
.../edu/ucsd/msjava/msgf/ToolLauncher.java | 154 ---
.../msscorer/ScoringParameterGenerator.java | 733 ---------------
.../ScoringParameterGeneratorWithErrors.java | 880 ------------------
.../edu/ucsd/msjava/msutil/AminoAcidSet.java | 2 +-
.../edu/ucsd/msjava/msutil/ScoredString.java | 67 --
.../java/edu/ucsd/msjava/output/Unimod.java | 85 --
.../ucsd/msjava/output/UnimodComposition.java | 133 ---
.../cli/MSGFPlusOptionsConfigFileTest.java | 27 +
.../msjava/msdbsearch/SearchParamsTest.java | 4 +-
.../java/msgfplus/TestDirectPinWriter.java | 1 -
13 files changed, 57 insertions(+), 2131 deletions(-)
delete mode 100644 src/main/java/edu/ucsd/msjava/msgf/ToolLauncher.java
delete mode 100644 src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
delete mode 100644 src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
delete mode 100644 src/main/java/edu/ucsd/msjava/msutil/ScoredString.java
delete mode 100644 src/main/java/edu/ucsd/msjava/output/Unimod.java
delete mode 100644 src/main/java/edu/ucsd/msjava/output/UnimodComposition.java
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
index a75de448..31b7188e 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlus.java
@@ -594,7 +594,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
System.out.println("TSV file: " + outputFile.getPath());
}
- if (params.writePin()) {
+ if (!params.writeTsv()) {
DirectPinWriter pinWriter = new DirectPinWriter(params, aaSet, sa, specAcc, ioIndex);
try {
pinWriter.writeResults(resultList, outputFile);
diff --git a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
index 453b0d87..e02fe1d6 100644
--- a/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
+++ b/src/main/java/edu/ucsd/msjava/cli/MSGFPlusOptions.java
@@ -241,45 +241,12 @@ public static CommandLine commandLine(MSGFPlusOptions opts) {
public int effectiveMaxPeptideLength() { return maxPeptideLength != null ? maxPeptideLength : 40; }
public int effectiveMinCharge() { return minCharge != null ? minCharge : 2; }
public int effectiveMaxCharge() { return maxCharge != null ? maxCharge : 3; }
- public int effectiveNumMatchesPerSpec() { return numMatchesPerSpec != null ? numMatchesPerSpec : 1; }
- public int effectiveNumThreads() { return numThreads != null ? numThreads : Runtime.getRuntime().availableProcessors(); }
- public int effectiveNumTasks() { return numTasks != null ? numTasks : 0; }
public int effectiveMinSpectraPerThread() { return minSpectraPerThread != null ? minSpectraPerThread : 250; }
public int effectiveVerbose() { return verbose != null ? verbose : 0; }
public int effectiveTdaStrategy() { return tdaStrategy != null ? tdaStrategy : 0; }
- public int effectiveAddFeatures() { return addFeatures != null ? addFeatures : 0; }
- public int effectiveMaxMissedCleavages() { return maxMissedCleavages != null ? maxMissedCleavages : -1; }
public int effectiveMaxNumMods() { return maxNumMods != null ? maxNumMods : (configMaxNumMods != null ? configMaxNumMods : 3); }
- public int effectiveAllowDenseCentroidedPeaks() { return allowDenseCentroidedPeaks != null ? allowDenseCentroidedPeaks : 0; }
- public int effectiveNumTolerableTermini() { return numTolerableTermini != null ? numTolerableTermini : 2; }
- public int effectiveEdgeScore() { return edgeScore != null ? edgeScore : 0; }
- public int effectiveIgnoreMetCleavage() { return ignoreMetCleavage != null ? ignoreMetCleavage : 0; }
- public int effectiveMinNumPeaks() { return minNumPeaks != null ? minNumPeaks : edu.ucsd.msjava.sequences.Constants.MIN_NUM_PEAKS_PER_SPECTRUM; }
- public int effectiveNumIsoforms() { return numIsoforms != null ? numIsoforms : edu.ucsd.msjava.sequences.Constants.NUM_VARIANTS_PER_PEPTIDE; }
- public int effectiveMinDeNovoScore() { return minDeNovoScore != null ? minDeNovoScore : edu.ucsd.msjava.sequences.Constants.MIN_DE_NOVO_SCORE; }
- public int effectiveToleranceUnits() { return precursorToleranceUnits != null ? precursorToleranceUnits : 2; }
- public double effectiveChargeCarrierMass() { return chargeCarrierMass != null ? chargeCarrierMass : 1.00727649; }
-
- public String effectiveDecoyPrefix() { return decoyPrefix != null ? decoyPrefix : "XXX"; }
- public PrecursorCalMode effectivePrecursorCal() { return precursorCalMode != null ? precursorCalMode : PrecursorCalMode.AUTO; }
public OutputFormat effectiveOutputFormat() { return outputFormat != null ? outputFormat : OutputFormat.PIN; }
- public PrecursorTolerance effectivePrecursorTolerance() {
- return precursorTolerance != null ? precursorTolerance : PrecursorTolerance.parse("20ppm");
- }
-
- public IntRange effectiveIsotopeErrorRange() {
- return isotopeErrorRange != null ? isotopeErrorRange : new IntRange(0, 1);
- }
-
- public IntRange effectiveMSLevel() {
- return msLevel != null ? msLevel : new IntRange(2, 2);
- }
-
- public IntRange effectiveSpecIndexRange() {
- return specIndexRange != null ? specIndexRange : new IntRange(1, Integer.MAX_VALUE - 1);
- }
-
/** Resolves {@code -m} index to {@link ActivationMethod}. MSGFPlus exposes
* 0=ASWRITTEN, 1=CID, 2=ETD, 3=HCD, 4=UVPD. The registry also defines
* FUSION (merge-mode synthetic method) and PQD, but neither is exposed
@@ -430,7 +397,7 @@ private String applyConfigEntry(String key, String value, String fileName) {
}
}
- private static String stripComment(String line) {
+ public static String stripComment(String line) {
int hash = line.indexOf('#');
return (hash >= 0 ? line.substring(0, hash) : line).trim();
}
@@ -474,6 +441,9 @@ private static String canonicalConfigKey(String key) {
public String validate() {
if (spectrumFile == null) return "Spectrum file is not defined; use -s at the command line or SpectrumFile in a config file";
if (databaseFile == null) return "Database file is not defined; use -d at the command line or DatabaseFile in a config file";
+ if (modificationFile != null && !modificationFile.exists()) {
+ return "Modification file not found: " + modificationFile.getPath();
+ }
String err;
if ((err = checkMin("-thread", numThreads, 1)) != null) return err;
@@ -506,7 +476,7 @@ public String validate() {
if (fragMethodId != null && (fragMethodId < 0 || fragMethodId > 4)) {
return "Invalid value for parameter -m: " + fragMethodId + " (valid: 0..4)";
}
- int instMax = ActivationMethodAvailability.instCount() - 1;
+ int instMax = InstrumentType.getAllRegisteredInstrumentTypes().length - 1;
if (instrumentTypeId != null && (instrumentTypeId < 0 || instrumentTypeId > instMax)) {
return "Invalid value for parameter -inst: " + instrumentTypeId + " (valid: 0.." + instMax + ")";
}
@@ -533,12 +503,6 @@ private static String checkRange(String flag, Integer value, int min, int max) {
return null;
}
- /** Helper that hides the {@link InstrumentType#getAllRegisteredInstrumentTypes}
- * call from {@code validate()} so the import block stays minimal. */
- private static final class ActivationMethodAvailability {
- static int instCount() { return InstrumentType.getAllRegisteredInstrumentTypes().length; }
- }
-
/** Mutator used by {@code AminoAcidSet} when the parsed mod metadata
* changes the effective max-num-mods (the AA set is authoritative once
* loaded). Mirrors the legacy {@code ParamManager.setMaxNumMods}. */
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index 1bcdda6c..81edd496 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -267,18 +267,10 @@ public int getMaxMSLevel() {
return maxMSLevel;
}
- public OutputFormat getOutputFormat() {
- return outputFormat;
- }
-
public boolean writeTsv() {
return outputFormat == OutputFormat.TSV;
}
- public boolean writePin() {
- return outputFormat == OutputFormat.PIN;
- }
-
/**
* Look for # in dataLine
* If present, remove that character and any comment after it
@@ -287,11 +279,7 @@ public boolean writePin() {
* @return dataLine without the comment
*/
public static String getConfigLineWithoutComment(String dataLine) {
- String[] tokenArray = dataLine.split("#");
- if (tokenArray.length == 0)
- return "";
-
- return tokenArray[0].trim();
+ return MSGFPlusOptions.stripComment(dataLine);
}
/**
@@ -316,7 +304,7 @@ public String parse(MSGFPlusOptions opts) {
String requiredErr = opts.validate();
if (requiredErr != null) return requiredErr;
- chargeCarrierMass = opts.effectiveChargeCarrierMass();
+ chargeCarrierMass = opts.chargeCarrierMass != null ? opts.chargeCarrierMass : 1.00727649;
Composition.setChargeCarrierMass(chargeCarrierMass);
// Read outputFormat up-front so the default-output-file extension logic
@@ -354,20 +342,20 @@ public String parse(MSGFPlusOptions opts) {
}
databaseFile = opts.databaseFile;
- decoyProteinPrefix = opts.effectiveDecoyPrefix();
+ decoyProteinPrefix = opts.decoyPrefix != null ? opts.decoyPrefix : "XXX";
- PrecursorTolerance tol = opts.effectivePrecursorTolerance();
+ PrecursorTolerance tol = opts.precursorTolerance != null ? opts.precursorTolerance : PrecursorTolerance.parse("20ppm");
leftPrecursorMassTolerance = tol.left;
rightPrecursorMassTolerance = tol.right;
- int toleranceUnit = opts.effectiveToleranceUnits();
+ int toleranceUnit = opts.precursorToleranceUnits != null ? opts.precursorToleranceUnits : 2;
if (toleranceUnit != 2) {
boolean isTolerancePPM = toleranceUnit != 0;
leftPrecursorMassTolerance = new Tolerance(leftPrecursorMassTolerance.getValue(), isTolerancePPM);
rightPrecursorMassTolerance = new Tolerance(rightPrecursorMassTolerance.getValue(), isTolerancePPM);
}
- IntRange isotope = opts.effectiveIsotopeErrorRange();
+ IntRange isotope = opts.isotopeErrorRange != null ? opts.isotopeErrorRange : new IntRange(0, 1);
this.minIsotopeError = isotope.min;
this.maxIsotopeError = isotope.max;
@@ -377,7 +365,7 @@ public String parse(MSGFPlusOptions opts) {
}
enzyme = opts.effectiveEnzyme();
- numTolerableTermini = opts.effectiveNumTolerableTermini();
+ numTolerableTermini = opts.numTolerableTermini != null ? opts.numTolerableTermini : 2;
activationMethod = opts.effectiveActivationMethod();
instType = opts.effectiveInstrumentType();
if (activationMethod == ActivationMethod.HCD
@@ -424,19 +412,19 @@ public String parse(MSGFPlusOptions opts) {
}
}
- numMatchesPerSpec = opts.effectiveNumMatchesPerSpec();
+ numMatchesPerSpec = opts.numMatchesPerSpec != null ? opts.numMatchesPerSpec : 1;
- IntRange specIdx = opts.effectiveSpecIndexRange();
+ IntRange specIdx = opts.specIndexRange != null ? opts.specIndexRange : new IntRange(1, Integer.MAX_VALUE - 1);
startSpecIndex = specIdx.min;
endSpecIndex = specIdx.max;
useTDA = opts.effectiveTdaStrategy() == 1;
- ignoreMetCleavage = opts.effectiveIgnoreMetCleavage() == 1;
- outputAdditionalFeatures = opts.effectiveAddFeatures() == 1;
+ ignoreMetCleavage = (opts.ignoreMetCleavage != null ? opts.ignoreMetCleavage : 0) == 1;
+ outputAdditionalFeatures = (opts.addFeatures != null ? opts.addFeatures : 0) == 1;
minPeptideLength = opts.effectiveMinPeptideLength();
maxPeptideLength = opts.effectiveMaxPeptideLength();
- maxNumVariantsPerPeptide = opts.effectiveNumIsoforms();
+ maxNumVariantsPerPeptide = opts.numIsoforms != null ? opts.numIsoforms : edu.ucsd.msjava.sequences.Constants.NUM_VARIANTS_PER_PEPTIDE;
if (minPeptideLength > maxPeptideLength) {
return "MinPepLength must not be larger than MaxPepLength";
@@ -448,27 +436,27 @@ public String parse(MSGFPlusOptions opts) {
return "MinCharge must not be larger than MaxCharge";
}
- numThreads = opts.effectiveNumThreads();
- numTasks = opts.effectiveNumTasks();
+ numThreads = opts.numThreads != null ? opts.numThreads : Runtime.getRuntime().availableProcessors();
+ numTasks = opts.numTasks != null ? opts.numTasks : 0;
minSpectraPerThread = opts.effectiveMinSpectraPerThread();
verbose = opts.effectiveVerbose() == 1;
- doNotUseEdgeScore = opts.effectiveEdgeScore() == 1;
+ doNotUseEdgeScore = (opts.edgeScore != null ? opts.edgeScore : 0) == 1;
dbIndexDir = opts.dbIndexDir;
- minNumPeaksPerSpectrum = opts.effectiveMinNumPeaks();
- minDeNovoScore = opts.effectiveMinDeNovoScore();
+ minNumPeaksPerSpectrum = opts.minNumPeaks != null ? opts.minNumPeaks : edu.ucsd.msjava.sequences.Constants.MIN_NUM_PEAKS_PER_SPECTRUM;
+ minDeNovoScore = opts.minDeNovoScore != null ? opts.minDeNovoScore : edu.ucsd.msjava.sequences.Constants.MIN_DE_NOVO_SCORE;
- maxMissedCleavages = opts.effectiveMaxMissedCleavages();
+ maxMissedCleavages = opts.maxMissedCleavages != null ? opts.maxMissedCleavages : -1;
if (maxMissedCleavages > -1 && enzyme.getName().equals("UnspecificCleavage")) {
return "Cannot specify a MaxMissedCleavages when using unspecific cleavage enzyme";
} else if (maxMissedCleavages > -1 && enzyme.getName().equals("NoCleavage")) {
return "Cannot specify a MaxMissedCleavages when using no cleavage enzyme";
}
- allowDenseCentroidedPeaks = opts.effectiveAllowDenseCentroidedPeaks() == 1;
- precursorCalMode = opts.effectivePrecursorCal();
+ allowDenseCentroidedPeaks = (opts.allowDenseCentroidedPeaks != null ? opts.allowDenseCentroidedPeaks : 0) == 1;
+ precursorCalMode = opts.precursorCalMode != null ? opts.precursorCalMode : PrecursorCalMode.AUTO;
- IntRange ms = opts.effectiveMSLevel();
+ IntRange ms = opts.msLevel != null ? opts.msLevel : new IntRange(2, 2);
minMSLevel = ms.min;
maxMSLevel = ms.max;
diff --git a/src/main/java/edu/ucsd/msjava/msgf/ToolLauncher.java b/src/main/java/edu/ucsd/msjava/msgf/ToolLauncher.java
deleted file mode 100644
index 01d57a74..00000000
--- a/src/main/java/edu/ucsd/msjava/msgf/ToolLauncher.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package edu.ucsd.msjava.msgf;
-
-import edu.ucsd.msjava.msscorer.NewAdditiveScorer;
-import edu.ucsd.msjava.msutil.AminoAcidSet;
-import edu.ucsd.msjava.msutil.Spectrum;
-
-import java.io.BufferedOutputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.util.Iterator;
-
-public abstract class ToolLauncher {
- // Essential parameters, set by the constructor
- protected final Iterator specIterator;
- protected final NewAdditiveScorer scorer;
-
- // Optional parameters set by builders.
-
- protected float specProb = 1e-9f;
-
- protected boolean trypticOnly = true;
-
- // Tolerance
- protected Tolerance pmTolerance = new Tolerance(30, true);
- protected Tolerance fragTolerance = new Tolerance(30, true);
-
- protected float minParentMass = 400;
- protected float maxParentMass = 2000;
- protected int msgfScoreThreshold = 0;
-
- // Amino acid set, default: standard + Carbamidomethyl C
- protected AminoAcidSet aaSet;
-
- // output
- protected PrintStream out;
-
- /**
- * A constructor specifies spectral file name and database file name. Database must be "fasta" format.
- *
- * @param specIterator spectra iterator.
- * @param scorer a scorer object.
- */
- protected ToolLauncher(Iterator specIterator, NewAdditiveScorer scorer) {
- this.specIterator = specIterator;
- this.scorer = scorer;
- this.out = System.out;
- this.aaSet = AminoAcidSet.getStandardAminoAcidSetWithFixedCarbamidomethylatedCys();
- }
-
- /**
- * A builder method to set spectral probability.
- *
- * @param specProb spectral probability
- * @return this object.
- */
- public ToolLauncher specProb(float specProb) {
- this.specProb = specProb;
- return this;
- }
-
- /**
- * If this method is called, non-tryptic peptides are generated.
- * Otherwise, only peptides ends with 'K' or 'R' are generated.
- *
- * @return this object.
- */
- public ToolLauncher allowNonTryptic() {
- this.trypticOnly = false;
- return this;
- }
-
-
- /**
- * Set parent mass tolerance.
- *
- * @param tolerance tolerance.
- * @return this object.
- */
- public ToolLauncher pmTolerance(Tolerance pmTolerance) {
- this.pmTolerance = pmTolerance;
- return this;
- }
-
- /**
- * Set fragment mass tolerance.
- *
- * @param tolerance tolerance.
- * @return this object.
- */
- public ToolLauncher fragTolerance(Tolerance fragTolerance) {
- this.fragTolerance = fragTolerance;
- return this;
- }
-
- /**
- * Set minimum parent mass.
- *
- * @param minParentMass minimum parent mass.
- * @return this object.
- */
- public ToolLauncher minParentMass(float minParentMass) {
- this.minParentMass = minParentMass;
- return this;
- }
-
- /**
- * Set maximum parent mass.
- *
- * @param maxParentMass maximum parent mass.
- * @return this object.
- */
- public ToolLauncher maxParentMass(float maxParentMass) {
- this.maxParentMass = maxParentMass;
- return this;
- }
-
- /**
- * Set max MSGF score threshold. Ignore all spectra whose best de novo scores are below thresholdScore.
- *
- * @param thresholdScore max MS-GF score threshold.
- * @return this object.
- */
- public ToolLauncher msgfScoreThreshold(int thresholdScore) {
- this.msgfScoreThreshold = thresholdScore;
- return this;
- }
-
- /**
- * Set the amino acid set.
- *
- * @param aaSet amino acid set.
- * @return this object.
- */
- public ToolLauncher aminoAcidSet(AminoAcidSet aaSet) {
- this.aaSet = aaSet;
- return this;
- }
-
- /**
- * Set the output.
- *
- * @param outputFileName output file name.
- * @return this object.
- */
- public ToolLauncher outputFileName(String outputFileName) {
- try {
- out = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFileName)));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- return this;
- }
-}
diff --git a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
deleted file mode 100644
index 62fee4b4..00000000
--- a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGenerator.java
+++ /dev/null
@@ -1,733 +0,0 @@
-package edu.ucsd.msjava.msscorer;
-
-import edu.ucsd.msjava.msgf.Histogram;
-import edu.ucsd.msjava.msgf.NominalMass;
-import edu.ucsd.msjava.msgf.Tolerance;
-import edu.ucsd.msjava.msscorer.NewScorerFactory.SpecDataType;
-import edu.ucsd.msjava.msutil.*;
-import edu.ucsd.msjava.mgf.MgfSpectrumParser;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * This only supports low accuracy fragment ions.
- *
- * @author sangtaekim
- */
-public class ScoringParameterGenerator extends NewRankScorer {
- private static final float MIN_OFFSET_MASS = -120; // for ion types
- private static final float MAX_OFFSET_MASS = 38;
- private static final float MIN_PRECURSOR_OFFSET = -300; // for precursors
- private static final float MAX_PRECURSOR_OFFSET = 30;
- private static final int MIN_NUM_SPECTRA_PER_PARTITION = 400; // 400
- private static final int MIN_NUM_SPECTRA_FOR_PRECURSOR_OFF = 150;
-
- private static final float MIN_PRECURSOR_OFFSET_PROBABILITY = 0.15f; // 0.15
- private static final float MIN_ION_OFFSET_PROBABILITY = 0.15f; // 0.15, for ion types
- private static final int MAX_RANK = 150;
- private static final int NUM_SEGMENTS_PER_SPECTRUM = 2; // 2
-
-
- private static final int[] smoothingRanks = {3, 5, 10, 20, 50, Integer.MAX_VALUE}; //Ranks around which smoothing occurs
- private static final int[] smoothingWindowSize = {0, 1, 2, 3, 4, 5}; //Smoothing windows for each smoothing rank
-
- private static final int NUM_NOISE_IONS = 10;
- protected static final int MAX_CHARGE = 20;
-
- public static void main(String argv[]) {
- File specFile = null;
- File outputFile = null;
- boolean isText = false;
- AminoAcidSet aaSet = AminoAcidSet.getStandardAminoAcidSetWithFixedCarbamidomethylatedCys();
- int numSpecsPerPeptide = 1;
- int errorScalingFactor = 10;
-
- // Fragmentation method
- ActivationMethod activationMethod = null;
- InstrumentType instType = null;
- Enzyme enzyme = null;
-
- for (int i = 0; i < argv.length; i += 2) {
- if (!argv[i].startsWith("-") || i + 1 >= argv.length)
- printUsageAndExit("Invalid parameter!");
- if (argv[i].equalsIgnoreCase("-i")) {
- specFile = new File(argv[i + 1]);
- if (!specFile.exists()) {
- printUsageAndExit(argv[i + 1] + " doesn't exist.");
- }
- int posDot = specFile.getName().lastIndexOf('.');
- if (posDot >= 0) {
- String extension = specFile.getName().substring(posDot);
- if (!extension.equalsIgnoreCase(".mgf"))
- printUsageAndExit("Invalid spectrum format: " + argv[i + 1]);
- } else
- printUsageAndExit("Invalid spectrum format: " + argv[i + 1]);
- } else if (argv[i].equalsIgnoreCase("-o")) {
- outputFile = new File(argv[i + 1]);
- } else if (argv[i].equalsIgnoreCase("-t")) {
- outputFile = new File(argv[i + 1]);
- isText = true;
- } else if (argv[i].equalsIgnoreCase("-fixMod")) {
- // 0: No mod, 1: Carbamidomethyl C, 2: Carboxymethyl C
- if (argv[i + 1].equalsIgnoreCase("0"))
- aaSet = AminoAcidSet.getStandardAminoAcidSet();
- else if (argv[i + 1].equalsIgnoreCase("1"))
- aaSet = AminoAcidSet.getStandardAminoAcidSetWithFixedCarbamidomethylatedCys();
- else if (argv[i + 1].equalsIgnoreCase("2"))
- aaSet = AminoAcidSet.getStandardAminoAcidSetWithFixedCarboxymethylatedCys();
- else
- printUsageAndExit("Invalid -fixMod parameter: " + argv[i + 1]);
- } else if (argv[i].equalsIgnoreCase("-pep")) {
- numSpecsPerPeptide = Integer.parseInt(argv[i + 1]);
- } else if (argv[i].equalsIgnoreCase("-err")) {
- errorScalingFactor = Integer.parseInt(argv[i + 1]);
- } else if (argv[i].equalsIgnoreCase("-m")) // Fragmentation method
- {
- // (0: written in the spectrum, 1: CID , 2: ETD, 3: HCD, 4: UVPD)
- if (argv[i + 1].equalsIgnoreCase("1")) {
- activationMethod = ActivationMethod.CID;
- } else if (argv[i + 1].equalsIgnoreCase("2")) {
- activationMethod = ActivationMethod.ETD;
- } else if (argv[i + 1].equalsIgnoreCase("3")) {
- activationMethod = ActivationMethod.HCD;
- } else if (argv[i + 1].equalsIgnoreCase("4")) {
- activationMethod = ActivationMethod.UVPD;
- } else {
- printUsageAndExit("Invalid activation method: " + argv[i + 1]);
- }
- } else if (argv[i].equalsIgnoreCase("-inst")) // Instrument type
- {
- if (argv[i + 1].equalsIgnoreCase("0")) {
- instType = InstrumentType.LOW_RESOLUTION_LTQ;
- } else if (argv[i + 1].equalsIgnoreCase("1")) {
- instType = InstrumentType.TOF;
- } else if (argv[i + 1].equalsIgnoreCase("2")) {
- instType = InstrumentType.HIGH_RESOLUTION_LTQ;
- } else {
- printUsageAndExit("Invalid instrument type: " + argv[i + 1]);
- }
- } else if (argv[i].equalsIgnoreCase("-e")) // Enzyme
- {
- // 0: No enzyme, 1: Trypsin, 2: Chymotrypsin, 3: LysC, 4: LysN, 5: GluC, 6: ArgC, 7: AspN
- if (argv[i + 1].equalsIgnoreCase("0"))
- enzyme = null;
- else if (argv[i + 1].equalsIgnoreCase("1"))
- enzyme = Enzyme.TRYPSIN;
- else if (argv[i + 1].equalsIgnoreCase("2"))
- enzyme = Enzyme.CHYMOTRYPSIN;
- else if (argv[i + 1].equalsIgnoreCase("3"))
- enzyme = Enzyme.LysC;
- else if (argv[i + 1].equalsIgnoreCase("4"))
- enzyme = Enzyme.LysN;
- else if (argv[i + 1].equalsIgnoreCase("5"))
- enzyme = Enzyme.GluC;
- else if (argv[i + 1].equalsIgnoreCase("6"))
- enzyme = Enzyme.ArgC;
- else if (argv[i + 1].equalsIgnoreCase("7"))
- enzyme = Enzyme.AspN;
- else
- printUsageAndExit("Invalid enzyme: " + argv[i + 1]);
- } else
- printUsageAndExit("Invalid parameters!");
- }
- if (specFile == null)
- printUsageAndExit("missing annotatedMgfFileName!");
- if (outputFile == null)
- printUsageAndExit("missing outputFileName!");
- if (activationMethod == null)
- printUsageAndExit("missing activationMethod!");
- if (instType == null)
- printUsageAndExit("missing instrumentType!");
-
- generateParameters(specFile, activationMethod, instType, enzyme, Protocol.AUTOMATIC, numSpecsPerPeptide, errorScalingFactor, outputFile, aaSet, isText, false);
- }
-
- public static void printUsageAndExit(String message) {
- System.err.println(message);
- System.out.println("usage: java -Xmx2000M -cp MSGF.jar msscorer.ScoringParameterGenerator\n" +
- "\t-i annotatedMgfFileName (*.mgf)\n" +
- "\t-o outputFileName (e.g. CID_Tryp.param)\n" +
- "\t-m FragmentationMethodID (1: CID, 2: ETD, 3: HCD, 4: UVPD)\n" +
- "\t-inst InstrumentID (0: Low-res LCQ/LTQ, 1: TOF , 2: High-res LTQ)\n" +
- "\t-e EnzymeID (0: No enzyme, 1: Trypsin (Default), 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N)\n" +
- "\t[-fixMod 0/1/2] (0: NoCysteineProtection, 1: CarbamidomethyC (default), 2: CarboxymethylC)\n" +
- "\t[-pep numPeptidesPerSpec] (default: 1)\n" +
- "\t[-err errorScalingFactor] (default: 10)"
- );
- System.exit(0);
- }
-
- public static void generateParameters(
- File specFile,
- ActivationMethod activationMethod,
- InstrumentType instType,
- Enzyme enzyme,
- Protocol protocol,
- int numSpecsPerPeptide,
- int errorScalingFactor,
- File outputFile,
- AminoAcidSet aaSet,
- boolean isText,
- boolean verbose) {
- SpectraContainer container = new SpectraContainer(specFile.getPath(), new MgfSpectrumParser().aaSet(aaSet));
-
- // multiple spectra with the same peptide -> one spec per peptide
- HashMap> pepSpecMap = new HashMap>();
- SpectraContainer specContOnePerPep = new SpectraContainer();
- for (Spectrum spec : container) {
- String pep = spec.getAnnotationStr() + ":" + spec.getCharge();
- if (pep != null && pep.length() > 0) {
- ArrayList specList = pepSpecMap.get(pep);
- if (specList == null) {
- specList = new ArrayList();
- pepSpecMap.put(pep, specList);
- }
- if (specList.size() < numSpecsPerPeptide)
- specList.add(spec);
- }
- }
- for (ArrayList specList : pepSpecMap.values())
- for (Spectrum spec : specList)
- specContOnePerPep.add(spec);
-
- SpecDataType dataType = new SpecDataType(activationMethod, instType, enzyme, protocol);
- ScoringParameterGenerator gen = new ScoringParameterGenerator(specContOnePerPep, dataType);
-
- // set up the tolerance
- gen.tolerance(new Tolerance(1 / Constants.INTEGER_MASS_SCALER / 2));
-
- // Step 1: partition spectra
- gen.partition(NUM_SEGMENTS_PER_SPECTRUM);
- if (verbose)
- System.out.println("Partition: " + gen.partitionSet.size());
-
- // Step 2: compute offset frequency functions of precursor peaks and their neutral losses
- gen.precursorOFF(MIN_PRECURSOR_OFFSET_PROBABILITY);
- if (verbose)
- System.out.println("PrecursorOFF Done.");
-
- // Step 3: filter out "significant" precursor offsets
- gen.filterPrecursorPeaks();
- if (verbose)
- System.out.println("Filtering Done.");
-
- // Step 4: compute offset frequency fnction of fragment peaks and determine ion types to be considered for scoring
- gen.selectIonTypes(MIN_ION_OFFSET_PROBABILITY);
- if (verbose)
- System.out.println("Ion types selected.");
-
- // Step 5: compute rank distributions
- gen.generateRankDist(MAX_RANK);
- if (verbose)
- System.out.println("Rank distribution computed.");
-
- // Step 6 (optional): generate error distribution, currently not in use
-
- // Step 7: smoothing parameters
- gen.smoothing();
- if (verbose)
- System.out.println("Smoothing complete.");
-
- // output
- if (!isText)
- gen.writeParameters(outputFile);
- else
- gen.writeParametersPlainText(outputFile);
-
- if (verbose)
- System.out.println("Writing Done.");
- }
-
- // Required
- private SpectraContainer specContainer;
-
- public ScoringParameterGenerator(SpectraContainer specContainer, SpecDataType dataType) {
- this.specContainer = specContainer;
- super.dataType = dataType;
- }
-
- public void partition(int numSegments) {
- super.numSegments = numSegments;
- chargeHist = new Histogram();
- partitionSet = new TreeSet();
-
- HashMap> parentMassMap = new HashMap>();
- for (Spectrum spec : specContainer) {
- int charge = spec.getCharge();
- if (charge <= 0)
- continue;
- chargeHist.add(charge);
- if (spec.getAnnotation() != null) {
- ArrayList precursorList = parentMassMap.get(charge);
- if (precursorList == null) {
- precursorList = new ArrayList();
- parentMassMap.put(charge, precursorList);
- }
- precursorList.add(spec.getPrecursorMass());
- }
- }
-
- for (int c = chargeHist.minKey(); c <= chargeHist.maxKey(); c++) {
- ArrayList parentMassList = parentMassMap.get(c);
- if (parentMassList == null)
- continue;
-
- int numSpec = parentMassList.size();
- if (numSpec < Math.round(MIN_NUM_SPECTRA_PER_PARTITION * 0.9f)) // to few spectra
- continue;
-
- Collections.sort(parentMassList);
- int bestSetSize = 0;
- int smallestRemainder = MIN_NUM_SPECTRA_PER_PARTITION;
- for (int i = Math.round(MIN_NUM_SPECTRA_PER_PARTITION * 0.9f); i <= Math.round(MIN_NUM_SPECTRA_PER_PARTITION * 1.1f); i++) {
- int remainder = numSpec % i;
- if (i - remainder < remainder)
- remainder = i - remainder;
- if (remainder < smallestRemainder || (remainder == smallestRemainder && Math.abs(MIN_NUM_SPECTRA_PER_PARTITION - i) < Math.abs(MIN_NUM_SPECTRA_PER_PARTITION - bestSetSize))) {
- bestSetSize = i;
- smallestRemainder = remainder;
- }
- }
- int num = 0;
- for (int i = 0; i == 0 || i < Math.round(numSpec / (float) bestSetSize); i++) {
- if (num != 0) {
- for (int seg = 0; seg < numSegments; seg++)
- partitionSet.add(new Partition(c, parentMassList.get(num), seg));
- } else {
- for (int seg = 0; seg < numSegments; seg++)
- partitionSet.add(new Partition(c, 0f, seg));
- }
- num += bestSetSize;
- }
- }
- }
-
- private void precursorOFF(float minProbThreshold) {
- if (chargeHist == null) {
- assert (false) : "partition() must have been called before";
- return;
- }
- precursorOFFMap = new TreeMap>();
- numPrecurOFF = 0;
-
- for (int charge = chargeHist.minKey(); charge <= chargeHist.maxKey(); charge++) {
- if (chargeHist.get(charge) < MIN_NUM_SPECTRA_FOR_PRECURSOR_OFF)
- continue;
- ArrayList precursorOffsetList = new ArrayList();
- int numSpecs = 0;
- HashMap> histList = new HashMap>();
- for (int c = charge; c >= 2; c--)
- histList.put(c, new Histogram());
-
- for (Spectrum spec : specContainer) {
- if (spec.getAnnotation() == null)
- continue;
- if (spec.getCharge() != charge)
- continue;
- numSpecs++;
- spec = filter.apply(spec);
- float precursorNeutralMass = spec.getPrecursorMass();
- for (int c = charge; c >= 2; c--) {
- float precursorMz = (precursorNeutralMass + c * (float) Composition.ChargeCarrierMass()) / c;
- ArrayList peakList = spec.getPeakListByMassRange(
- precursorMz + MIN_PRECURSOR_OFFSET / (float) c - mme.getToleranceAsDa(precursorMz + MIN_PRECURSOR_OFFSET / (float) c) / 2,
- precursorMz + MAX_PRECURSOR_OFFSET / (float) c + mme.getToleranceAsDa(precursorMz + MAX_PRECURSOR_OFFSET / (float) c) / 2);
-
- int prevMassIndexDiff = Integer.MIN_VALUE;
- for (Peak p : peakList) {
- float peakMass = p.getMz();
- int massIndexDiff = NominalMass.toNominalMass(peakMass - precursorMz);
- if (massIndexDiff > prevMassIndexDiff) {
- histList.get(c).add(massIndexDiff);
- prevMassIndexDiff = massIndexDiff;
- }
- }
- }
- }
-
- for (int c = charge; c >= 2; c--) {
- ArrayList keyList = new ArrayList(histList.get(c).keySet());
- Collections.sort(keyList);
- for (Integer key : keyList) {
- float prob = (histList.get(c).get(key)) / (float) numSpecs;
- if (prob > minProbThreshold) {
- precursorOffsetList.add(new PrecursorOffsetFrequency((charge - c), NominalMass.getMassFromNominalMass(key), prob));
- }
- }
- }
- precursorOFFMap.put(charge, precursorOffsetList);
- numPrecurOFF += precursorOffsetList.size();
- }
- }
-
- private void filterPrecursorPeaks() {
- if (this.precursorOFFMap == null)
- return;
- for (Spectrum spec : specContainer) {
- for (PrecursorOffsetFrequency off : this.getPrecursorOFF(spec.getCharge()))
- spec.filterPrecursorPeaks(mme, off.getReducedCharge(), off.getOffset());
- }
- }
-
- private Pair getPrecursorMassRange(Partition partition) {
- float minParentMass = partition.getParentMass();
- float maxParentMass = Float.MAX_VALUE;
- Partition higherPartition = partitionSet.higher(partition);
- if (higherPartition != null) {
- if (higherPartition.getCharge() == partition.getCharge() && higherPartition.getSegNum() == partition.getSegNum()) {
- maxParentMass = higherPartition.getParentMass();
- }
- }
- return new Pair(minParentMass, maxParentMass);
- }
-
- private void selectIonTypes(float minProbThreshold) {
- if (partitionSet == null) {
- assert (false) : "partition() must have been called before!";
- return;
- }
-
- fragOFFTable = new HashMap>();
- insignificantFragOFFTable = new HashMap>();
-
- for (Partition partition : partitionSet) {
- int charge = partition.getCharge();
- // parent mass range check
- Pair parentMassRange = getPrecursorMassRange(partition);
- int seg = partition.getSegNum();
-
- int numSpec = 0;
- HashMap> prefixIonFreq = new HashMap>();
- HashMap> suffixIonFreq = new HashMap>();
- for (int c = 1; c <= charge; c++) {
- prefixIonFreq.put(c, new Histogram());
- suffixIonFreq.put(c, new Histogram());
- }
-
- int numCleavages = 0;
- for (Spectrum spec : specContainer) {
- if (spec.getAnnotation() == null)
- continue;
- if (spec.getCharge() != charge)
- continue;
-
- float curParentMass = spec.getPrecursorMass();
- if (curParentMass < parentMassRange.getFirst() || curParentMass >= parentMassRange.getSecond())
- continue;
-
- Peptide annotation = spec.getAnnotation();
- numCleavages += annotation.size() - 1;
- numSpec++;
- spec = filter.apply(spec);
-
- for (int c = 1; c <= charge; c++) {
- for (int direction = 0; direction < 2; direction++) {
- double accurateMass = 0;
- HashMap> ionFreq = null;
- for (int i = 0; i < annotation.size() - 1; i++) {
- if (direction == 0) {
- accurateMass += annotation.get(i).getAccurateMass();
- ionFreq = prefixIonFreq;
- } else if (direction == 1) {
- accurateMass += annotation.get(annotation.size() - 1 - i).getAccurateMass();
- ionFreq = suffixIonFreq;
- }
- float mass = (float) (accurateMass / c);
- ArrayList peakList = spec.getPeakListByMassRange(
- mass + MIN_OFFSET_MASS / (float) c - mme.getToleranceAsDa(mass),
- mass + MAX_OFFSET_MASS / (float) c + mme.getToleranceAsDa(mass));
- int prevIntOffset = Integer.MIN_VALUE;
- for (Peak p : peakList) {
- float peakMz = p.getMz();
- int segNum = getSegmentNum(peakMz, curParentMass);
- if (segNum != seg)
- continue;
- float offset = peakMz - mass;
- int intOffset = NominalMass.toNominalMass(offset);
- if (intOffset > prevIntOffset) {
- ionFreq.get(c).add(intOffset);
- prevIntOffset = intOffset;
- }
- }
- }
- }
- }
- }
-
- float maxProb = 0;
- int maxCharge = 0;
- int maxDirection = 0;
- float maxOffset = 0;
-
- ArrayList fragmentOffsetFrequencyList = new ArrayList();
- ArrayList insignificantFragmentOffsetFrequencyList = new ArrayList();
- for (int c = 1; c <= charge; c++) {
- for (int direction = 0; direction < 2; direction++) {
- ArrayList keyList;
- if (direction == 0)
- keyList = new ArrayList(prefixIonFreq.get(c).keySet());
- else
- keyList = new ArrayList(suffixIonFreq.get(c).keySet());
-
- Collections.sort(keyList);
- for (Integer key : keyList) {
- float offset = NominalMass.getMassFromNominalMass(key);
- int freq;
- if (direction == 0)
- freq = prefixIonFreq.get(c).get(key);
- else
- freq = suffixIonFreq.get(c).get(key);
- float prob = freq / (float) numCleavages * numSegments;
- if (prob > maxProb) {
- maxProb = prob;
- maxCharge = c;
- maxDirection = direction;
- maxOffset = offset;
- }
- if (prob > minProbThreshold) {
- if (direction == 0)
- fragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.PrefixIon(c, offset), prob));
- else
- fragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.SuffixIon(c, offset), prob));
- } else {
- if (direction == 0)
- insignificantFragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.PrefixIon(c, offset), prob));
- else
- insignificantFragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.SuffixIon(c, offset), prob));
- }
- }
- }
- }
-
- if (fragmentOffsetFrequencyList.size() == 0) {
- if (maxDirection == 0)
- fragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.PrefixIon(maxCharge, maxOffset), maxProb));
- else
- fragmentOffsetFrequencyList.add(new FragmentOffsetFrequency(new IonType.SuffixIon(maxCharge, maxOffset), maxProb));
- }
-
- Collections.sort(insignificantFragmentOffsetFrequencyList);
- ArrayList noiseOffsetFrequencyList = new ArrayList(NUM_NOISE_IONS);
-
- int numNoise = 0;
- for (FragmentOffsetFrequency off : insignificantFragmentOffsetFrequencyList) {
- if (off.getIonType().getCharge() == 1)
- noiseOffsetFrequencyList.add(off);
- if (++numNoise >= NUM_NOISE_IONS)
- break;
- }
- Collections.sort(fragmentOffsetFrequencyList, Collections.reverseOrder());
- fragOFFTable.put(partition, fragmentOffsetFrequencyList);
- insignificantFragOFFTable.put(partition, noiseOffsetFrequencyList);
- }
- }
-
- private void generateRankDist(int maxRank) {
- if (partitionSet == null) {
- assert (false) : "partition() must have been called!";
- return;
- }
-
- rankDistTable = new HashMap>();
- this.maxRank = maxRank;
-
- for (Partition partition : partitionSet) {
- int charge = partition.getCharge();
- IonType[] ionTypes = getIonTypes(partition);
- if (ionTypes == null || ionTypes.length == 0)
- continue;
- Pair parentMassRange = getPrecursorMassRange(partition);
- int seg = partition.getSegNum();
-
- int numSpec = 0;
- HashMap> rankDist = new HashMap>();
- HashMap rankDistMaxRank = new HashMap();
- HashMap rankDistUnexplained = new HashMap();
-
- for (IonType ion : ionTypes) {
- rankDist.put(ion, new Histogram());
- rankDistMaxRank.put(ion, 0f);
- rankDistUnexplained.put(ion, 0f);
- }
- rankDist.put(IonType.NOISE, new Histogram());
-
- float[] noiseDist = new float[maxRank + 2];
- int numMaxRankPeaks = 0;
- int totalCleavageSites = 0;
-
- for (Spectrum spec : specContainer) {
- int numExplainedPeaks = 0;
- if (spec.getAnnotation() == null)
- continue;
- if (spec.getCharge() != charge)
- continue;
- float curParentMass = spec.getPrecursorMass();
- if (curParentMass < parentMassRange.getFirst() || curParentMass >= parentMassRange.getSecond())
- continue;
-
- Peptide annotation = spec.getAnnotation();
- spec.setRanksOfPeaks();
- numSpec++;
- numMaxRankPeaks += spec.size() - maxRank + 1;
- totalCleavageSites += annotation.size() - 1;
- int prmMassIndex = 0;
- int srmMassIndex = 0;
-
- HashSet explainedPeakSet = new HashSet();
- HashMap numExplainedMaxRankPeaks = new HashMap();
- for (IonType ion : ionTypes) {
- numExplainedMaxRankPeaks.put(ion, 0);
- }
-
- int numSignalBinsAtThisSegment = 0;
- for (int i = 0; i < annotation.size() - 1; i++) {
- prmMassIndex += NominalMass.toNominalMass(annotation.get(i).getMass());
- srmMassIndex += NominalMass.toNominalMass(annotation.get(annotation.size() - 1 - i).getMass());
-
- float prm = NominalMass.getMassFromNominalMass(prmMassIndex);
- float srm = NominalMass.getMassFromNominalMass(srmMassIndex);
- for (IonType ion : ionTypes) {
- float theoMass;
- if (ion instanceof IonType.PrefixIon)
- theoMass = ion.getMz(prm);
- else
- theoMass = ion.getMz(srm);
-
- int segNum = super.getSegmentNum(theoMass, curParentMass);
- if (segNum == seg) {
- numSignalBinsAtThisSegment++;
- Peak p = spec.getPeakByMass(theoMass, mme);
- if (p != null) {
- numExplainedPeaks++;
- int rank = p.getRank();
- if (rank >= maxRank) {
- rank = maxRank;
- numExplainedMaxRankPeaks.put(ion, numExplainedMaxRankPeaks.get(ion) + 1);
- }
- explainedPeakSet.add(p);
- rankDist.get(ion).add(rank);
- } else {
- rankDist.get(ion).add(maxRank + 1); // maxRank+1: missing ion
- }
- }
- }
- }
-
- ArrayList unexplainedPeaksAtThisSegment = new ArrayList();
- int numPeaksAtThisSegment = 0;
- int numMaxRankPeaksAtThisSegment = 0;
- for (Peak p : spec) {
- if (super.getSegmentNum(p.getMz(), curParentMass) == seg) {
- numPeaksAtThisSegment++;
- if (p.getRank() >= maxRank)
- numMaxRankPeaksAtThisSegment++;
- if (!explainedPeakSet.contains(p))
- unexplainedPeaksAtThisSegment.add(p);
- }
- }
-
- float midMassThisSegment = (1f / numSegments * seg + 1f / numSegments / 2) * annotation.getParentMass();
- float numBinsAtThisSegment = annotation.getParentMass() / numSegments / mme.getToleranceAsDa(midMassThisSegment) / 2;
-
- for (Peak p : unexplainedPeaksAtThisSegment) {
- int rank = p.getRank();
-// float noiseFreq = (float)(annotation.size()-1)/(annotation.getParentMass()/(mme.getToleranceAsDa(midMassThisSegment)*2));
- float noiseFreq = (annotation.size() - 1) / numSegments / numBinsAtThisSegment;
- if (rank >= maxRank)
- noiseDist[maxRank] += noiseFreq / numMaxRankPeaksAtThisSegment;
- else
- noiseDist[rank] += noiseFreq;
- }
-
- for (IonType ion : ionTypes) {
- if (numMaxRankPeaksAtThisSegment > 0) {
- Float prevSumFreq = rankDistMaxRank.get(ion);
- float curFreq = numExplainedMaxRankPeaks.get(ion) / (float) numMaxRankPeaksAtThisSegment;
- rankDistMaxRank.put(ion, prevSumFreq + curFreq);
- }
- }
-
- noiseDist[maxRank + 1] += (numBinsAtThisSegment - numPeaksAtThisSegment) * (annotation.size() - 1) / numSegments / numBinsAtThisSegment;
- }
-
- HashMap freqDist = new HashMap();
- for (IonType ion : ionTypes) {
- Float[] dist = new Float[maxRank + 1];
- Histogram hist = rankDist.get(ion);
- for (int i = 1; i <= maxRank - 1; i++) {
- Integer num = hist.get(i);
- dist[i - 1] = (num / (float) numSpec);
- }
- dist[maxRank - 1] = rankDistMaxRank.get(ion) / numSpec;
- dist[maxRank] = hist.get(maxRank + 1) / (float) numSpec;
- freqDist.put(ion, dist);
- }
-
- // noise
- Float[] dist = new Float[maxRank + 1];
- for (int i = 1; i <= maxRank + 1; i++)
- dist[i - 1] = noiseDist[i] / numSpec;
- freqDist.put(IonType.NOISE, dist);
-
- rankDistTable.put(partition, freqDist);
- }
- }
-
- protected void smoothing() {
- smoothingRankDistTable();
- }
-
- protected void smoothingRankDistTable() {
- if (rankDistTable == null)
- return;
- assert (smoothingRanks.length == smoothingWindowSize.length);
- for (Partition partition : rankDistTable.keySet()) {
- HashMap table = this.rankDistTable.get(partition);
- for (IonType ion : table.keySet()) {
- Float[] freq = table.get(ion);
- Float[] smoothedFreq = new Float[freq.length];
- int smoothingIndex = 0;
- for (int i = 0; i < freq.length - 2; i++) // last 2 columns: maxRank, unexplained
- {
- if (smoothingIndex < smoothingRanks.length - 1 &&
- i == smoothingRanks[smoothingIndex])
- smoothingIndex++;
- int windowSize = smoothingWindowSize[smoothingIndex];
- float sumFrequencies = 0;
- int numIndicesSummed = 0;
- for (int d = -windowSize; d <= windowSize; d++) {
- int index = i + d;
- if (index < 0 || index > freq.length - 3)
- continue;
- sumFrequencies += freq[index];
- numIndicesSummed++;
- }
- while (sumFrequencies == 0 && windowSize < freq.length - 4) {
- windowSize++;
- int index = i - windowSize;
- if (index >= 0) {
- sumFrequencies += freq[index];
- numIndicesSummed++;
- }
- index = i + windowSize;
- if (index <= freq.length - 3) {
- sumFrequencies += freq[index];
- numIndicesSummed++;
- }
- }
- if (sumFrequencies != 0)
- smoothedFreq[i] = sumFrequencies / numIndicesSummed;
- else
- assert (false);
- }
- for (int i = 0; i < freq.length - 2; i++)
- freq[i] = smoothedFreq[i];
- if (freq[freq.length - 1] == 0)
- freq[freq.length - 1] = Float.MIN_VALUE;
- if (freq[freq.length - 2] == 0)
- freq[freq.length - 2] = freq[freq.length - 3];
- }
- }
- }
-}
\ No newline at end of file
diff --git a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java b/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
deleted file mode 100644
index 8cedf8e6..00000000
--- a/src/main/java/edu/ucsd/msjava/msscorer/ScoringParameterGeneratorWithErrors.java
+++ /dev/null
@@ -1,880 +0,0 @@
-package edu.ucsd.msjava.msscorer;
-
-import edu.ucsd.msjava.msgf.Histogram;
-import edu.ucsd.msjava.msgf.IntHistogram;
-import edu.ucsd.msjava.msgf.NominalMass;
-import edu.ucsd.msjava.msgf.Tolerance;
-import edu.ucsd.msjava.msscorer.NewScorerFactory.SpecDataType;
-import edu.ucsd.msjava.msutil.*;
-import edu.ucsd.msjava.msutil.IonType.PrefixIon;
-import edu.ucsd.msjava.mgf.MgfSpectrumParser;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * This only supports low accuracy fragment ions.
- *
- * @author sangtaekim
- */
-public class ScoringParameterGeneratorWithErrors extends NewRankScorer {
- private static final float MIN_PRECURSOR_OFFSET = -300; // for precursors
- private static final float MAX_PRECURSOR_OFFSET = 30;
- private static final int MIN_NUM_SPECTRA_PER_PARTITION = 400; // 400
- private static final int MIN_NUM_SPECTRA_FOR_PRECURSOR_OFF = 150;
- private static final int MAX_NUM_PARTITIONS_PER_CHARGE = 30; // 30
-
- private static final float MIN_PRECURSOR_OFFSET_PROBABILITY = 0.15f; // 0.15
- private static final float MIN_ION_OFFSET_PROBABILITY = 0.15f; // 0.15, for ion types
- private static final float MIN_MAIN_ION_OFFSET_PROBABILITY = 0.01f; // ions with probabilities below this number will be ignored
-
- private static final int MAX_RANK = 150;
- private static final int NUM_SEGMENTS_PER_SPECTRUM = 2; // 2
-
- private static final int[] smoothingRanks = {3, 5, 10, 20, 50, Integer.MAX_VALUE}; //Ranks around which smoothing occurs
- private static final int[] smoothingWindowSize = {0, 1, 2, 3, 4, 5}; //Smoothing windows for each smoothing rank
-
- private static final float DECONVOLUTION_MASS_TOLERANCE = 0.02f;
- protected static final int MAX_CHARGE = 20;
-
- public static void generateParameters(
- File specFile,
- SpecDataType dataType,
- AminoAcidSet aaSet,
- File outputDir,
- boolean isText,
- boolean verbose,
- boolean singlePartition
- ) {
- SpectraContainer container = new SpectraContainer(specFile.getPath(), new MgfSpectrumParser().aaSet(aaSet));
- generateParameters(container, dataType, aaSet, outputDir, isText, verbose, singlePartition);
- }
-
- public static void generateParameters(
- SpectraContainer container,
- SpecDataType dataType,
- AminoAcidSet aaSet,
- File outputDir,
- boolean isText,
- boolean verbose) {
- generateParameters(container, dataType, aaSet, outputDir, isText, verbose, false);
- }
-
- public static void generateParameters(
- SpectraContainer container,
- SpecDataType dataType,
- AminoAcidSet aaSet,
- File outputDir,
- boolean isText,
- boolean verbose,
- boolean singlePartition) {
- if (verbose)
- System.out.println("Number of annotated PSMs: " + container.size());
-
- String paramFileName = dataType.toString() + ".param";
-
- File outputFile;
- if (outputDir != null)
- outputFile = new File(outputDir, paramFileName);
- else
- outputFile = new File(paramFileName);
-
- if (verbose)
- System.out.println("Output file name: " + outputFile.getAbsolutePath());
- int errorScalingFactor = 0;
- boolean applyDeconvolution = false;
-
- if (dataType.getInstrumentType() == InstrumentType.HIGH_RESOLUTION_LTQ
- || dataType.getInstrumentType() == InstrumentType.TOF
- || dataType.getInstrumentType().isHighResolution()) {
- errorScalingFactor = 100;
- applyDeconvolution = true;
- if (verbose)
- System.out.println("High-precision MS/MS data: " +
- "errorScalingFactor(" + errorScalingFactor + ") " +
- "chargeDeconvolution(" + applyDeconvolution + ")");
- }
-
- boolean considerPhosLoss = false;
- if (dataType.getProtocol().getName().equals("Phosphorylation")) {
- considerPhosLoss = true;
- if (verbose)
- System.out.println("Consider H3PO4 loss.");
- }
-
- boolean consideriTRAQLoss = false;
- if (dataType.getProtocol().getName().equals("iTRAQ")) {
- consideriTRAQLoss = true;
- if (verbose)
- System.out.println("Consider iTRAQ loss.");
- }
-
- boolean considerTMTLoss = false;
- if (dataType.getProtocol().getName().equals("TMT")) {
- considerTMTLoss = true;
- if (verbose)
- System.out.println("Consider TMT loss.");
- }
-
- if (dataType.getProtocol().getName().equals("iTRAQPhospho")) {
- considerPhosLoss = true;
- consideriTRAQLoss = true;
- if (verbose)
- System.out.println("Consider iTRAQ and H3PO4 loss.");
- }
-
- HashSet pepSet = new HashSet();
- for (Spectrum spec : container)
- pepSet.add(spec.getAnnotationStr());
-
- if (verbose)
- System.out.println("Number of unique peptides: " + pepSet.size());
- int numSpecsPerPeptide;
- if (pepSet.size() < 2000) {
- numSpecsPerPeptide = 3;
- } else {
- numSpecsPerPeptide = 1;
- }
- if (verbose)
- System.out.println("Consider " + numSpecsPerPeptide + " per spectrum.");
-
- // multiple spectra with the same peptide -> one spec per peptide
- HashMap> pepSpecMap = new HashMap>();
- for (Spectrum spec : container) {
- if (spec.getAnnotationStr() == null)
- continue;
- String pep = spec.getAnnotationStr() + ":" + spec.getCharge();
- if (pep != null && pep.length() > 0) {
- ArrayList specList = pepSpecMap.get(pep);
- if (specList == null) {
- specList = new ArrayList();
- pepSpecMap.put(pep, specList);
- }
- if (specList.size() < numSpecsPerPeptide)
- specList.add(spec);
- }
- }
-
- SpectraContainer specContOnePerPep = new SpectraContainer();
- for (ArrayList specList : pepSpecMap.values()) {
- for (Spectrum spec : specList) {
- specContOnePerPep.add(spec);
- }
- }
-
- ScoringParameterGeneratorWithErrors gen = new ScoringParameterGeneratorWithErrors(
- specContOnePerPep,
- dataType,
- considerPhosLoss,
- consideriTRAQLoss,
- considerTMTLoss,
- applyDeconvolution);
-
- // set up the tolerance
- gen.tolerance(new Tolerance(0.5f));
-
- // Step 1: partition spectra
- if (singlePartition)
- gen.partition(2, true);
- else
- gen.partition(NUM_SEGMENTS_PER_SPECTRUM, false);
- if (verbose)
- System.out.println("Partition: " + gen.partitionSet.size());
-
- // Step 2: compute offset frequency functions of precursor peaks and their neutral losses
- gen.precursorOFF(MIN_PRECURSOR_OFFSET_PROBABILITY);
- if (verbose)
- System.out.println("PrecursorOFF Done.");
-
- // Step 3: filter out "significant" precursor offsets
- gen.filterPrecursorPeaks();
- if (verbose)
- System.out.println("Filtering Done.");
-
- if (applyDeconvolution) {
- gen.deconvoluteSpectra();
- if (verbose)
- System.out.println("Deconvolution Done.");
- }
-
- // Step 4: compute offset frequency function of fragment peaks and determine ion types to be considered for scoring
- gen.selectIonTypes();
- if (verbose)
- System.out.println("Ion types selected.");
-
- // Step 5: compute rank distributions
- gen.generateRankDist(MAX_RANK);
- if (verbose)
- System.out.println("Rank distribution computed.");
-
- // Step 6 (optional): generate error distribution
- gen.generateErrorDist(errorScalingFactor);
- if (verbose)
- System.out.println("Error disbribution computed");
-
- // Step 7: smoothing parameters
- gen.smoothing();
- if (verbose)
- System.out.println("Smoothing complete.");
-
- // output
-
- gen.writeParameters(outputFile);
- gen.writeParametersPlainText(new File(outputFile.getPath()+".txt"));
- //if (!isText)
- // gen.writeParameters(outputFile);
- //else
- // gen.writeParametersPlainText(outputFile);
-
- if (verbose)
- System.out.println("Writing Done.");
- }
-
- // Required
- private SpectraContainer specContainer;
- private final boolean considerPhosLoss;
- private final boolean consideriTRAQLoss;
- private final boolean considerTMTLoss;
-
- public ScoringParameterGeneratorWithErrors(SpectraContainer specContainer, SpecDataType dataType, boolean considerPhosLoss, boolean consideriTRAQLoss, boolean considerTMTLoss, boolean applyDeconvolution) {
- this.specContainer = specContainer;
- this.considerPhosLoss = considerPhosLoss;
- this.consideriTRAQLoss = consideriTRAQLoss;
- this.considerTMTLoss = considerTMTLoss;
- super.dataType = dataType;
- super.applyDeconvolution = applyDeconvolution;
- super.deconvolutionErrorTolerance = DECONVOLUTION_MASS_TOLERANCE;
- }
-
- public void partition(int numSegments, boolean singlePartition) {
- super.numSegments = numSegments;
- chargeHist = new Histogram();
- partitionSet = new TreeSet();
-
-
- HashMap> parentMassMap = new HashMap>();
- for (Spectrum spec : specContainer) {
- int charge = spec.getCharge();
- if (charge <= 0)
- continue;
- chargeHist.add(charge);
- if (spec.getAnnotation() != null) {
- ArrayList precursorList = parentMassMap.get(charge);
- if (precursorList == null) {
- precursorList = new ArrayList();
- parentMassMap.put(charge, precursorList);
- }
- precursorList.add(spec.getPrecursorMass());
- }
- }
-
- for (int c = chargeHist.minKey(); c <= chargeHist.maxKey(); c++) {
-
- ArrayList parentMassList = parentMassMap.get(c);
- if (parentMassList == null)
- continue;
-
- int numSpec = parentMassList.size();
- if (numSpec < Math.round(MIN_NUM_SPECTRA_PER_PARTITION * 0.9f)) // to few spectra
- continue;
-
- int partitionSize = Math.max(numSpec / MAX_NUM_PARTITIONS_PER_CHARGE, MIN_NUM_SPECTRA_PER_PARTITION);
-
- Collections.sort(parentMassList);
- int bestSetSize = 0;
-
- if (singlePartition)
- bestSetSize = numSpec;
- else {
- int smallestRemainder = partitionSize;
- for (int i = Math.round(partitionSize * 0.9f); i <= Math.round(partitionSize * 1.1f); i++) {
- int remainder = numSpec % i;
- if (i - remainder < remainder)
- remainder = i - remainder;
- if (remainder < smallestRemainder || (remainder == smallestRemainder && Math.abs(partitionSize - i) < Math.abs(partitionSize - bestSetSize))) {
- bestSetSize = i;
- smallestRemainder = remainder;
- }
- }
- }
- int num = 0;
- for (int i = 0; i == 0 || i < Math.round(numSpec / (float) bestSetSize); i++) {
- if (num != 0) {
- for (int seg = 0; seg < numSegments; seg++)
- partitionSet.add(new Partition(c, parentMassList.get(num), seg));
- } else {
- for (int seg = 0; seg < numSegments; seg++)
- partitionSet.add(new Partition(c, 0f, seg));
- }
- num += bestSetSize;
- }
- }
- }
-
- private void precursorOFF(float minProbThreshold) {
- if (chargeHist == null) {
- assert (false) : "partition() must have been called before";
- return;
- }
- precursorOFFMap = new TreeMap>();
- numPrecurOFF = 0;
-
- for (int charge = chargeHist.minKey(); charge <= chargeHist.maxKey(); charge++) {
- if (chargeHist.get(charge) < MIN_NUM_SPECTRA_FOR_PRECURSOR_OFF)
- continue;
- ArrayList precursorOffsetList = new ArrayList();
- int numSpecs = 0;
- HashMap> histList = new HashMap>();
- for (int c = charge; c >= 2; c--)
- histList.put(c, new Histogram());
-
- for (Spectrum spec : specContainer) {
- if (spec.getAnnotation() == null)
- continue;
- if (spec.getCharge() != charge)
- continue;
- numSpecs++;
- spec = filter.apply(spec);
- float precursorNeutralMass = spec.getPrecursorMass();
- for (int c = charge; c >= 2; c--) {
- float precursorMz = (precursorNeutralMass + c * (float) Composition.ChargeCarrierMass()) / c;
- ArrayList peakList = spec.getPeakListByMassRange(
- precursorMz + MIN_PRECURSOR_OFFSET / (float) c - mme.getToleranceAsDa(precursorMz + MIN_PRECURSOR_OFFSET / (float) c) / 2,
- precursorMz + MAX_PRECURSOR_OFFSET / (float) c + mme.getToleranceAsDa(precursorMz + MAX_PRECURSOR_OFFSET / (float) c) / 2);
-
- int prevMassIndexDiff = Integer.MIN_VALUE;
- for (Peak p : peakList) {
- float peakMass = p.getMz();
- int massIndexDiff = NominalMass.toNominalMass(peakMass - precursorMz);
- if (massIndexDiff > prevMassIndexDiff) {
- histList.get(c).add(massIndexDiff);
- prevMassIndexDiff = massIndexDiff;
- }
- }
- }
- }
-
- for (int c = charge; c >= 2; c--) {
- ArrayList