Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ subprojects {
}

dependencies {
implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25'
implementation group: 'org.slf4j', name: 'jcl-over-slf4j', version: '1.7.25'
implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.36'
implementation group: 'org.slf4j', name: 'jcl-over-slf4j', version: '1.7.36'
implementation group: 'org.slf4j', name: 'jul-to-slf4j', version: '1.7.36'
implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.13'
implementation "com.google.code.findbugs:jsr305:3.0.0"
implementation group: 'org.springframework', name: 'spring-context', version: "${springVersion}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import com.google.common.primitives.Bytes;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -29,9 +31,14 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
Expand All @@ -45,6 +52,7 @@
import org.iq80.leveldb.ReadOptions;
import org.iq80.leveldb.WriteBatch;
import org.iq80.leveldb.WriteOptions;
import org.tron.common.es.ExecutorServiceManager;
import org.tron.common.parameter.CommonParameter;
import org.tron.common.storage.WriteOptionsWrapper;
import org.tron.common.storage.metric.DbStat;
Expand All @@ -61,6 +69,13 @@
public class LevelDbDataSourceImpl extends DbStat implements DbSourceInter<byte[]>,
Iterable<Entry<byte[], byte[]>>, Instance<LevelDbDataSourceImpl> {

/** First watchdog WARN fires this many seconds after factory.open() begins. */
private static final long OPEN_WATCHDOG_INITIAL_DELAY_SEC = 60;
/** Subsequent watchdog WARN lines are emitted on this interval. */
private static final long OPEN_WATCHDOG_PERIOD_SEC = 30;
/** Value of {@code Filename.currentFileName()}. */
private static final String LEVELDB_CURRENT_FILE = "CURRENT";

private String dataBaseName;
private DB database;
private volatile boolean alive;
Expand Down Expand Up @@ -121,6 +136,15 @@ private void openDatabase(Options dbOptions) throws IOException {
if (!Files.isSymbolicLink(dbPath.getParent())) {
Files.createDirectories(dbPath.getParent());
}
final long openStartNs = System.nanoTime();
final AtomicReference<String> manifestInfo = new AtomicReference<>();
ScheduledExecutorService watchdog = ExecutorServiceManager
.newSingleThreadScheduledExecutor("db-open-watchdog-" + dataBaseName, true);
ScheduledFuture<?> watchdogTask = watchdog.scheduleAtFixedRate(
() -> logSlowOpen(dbPath, openStartNs, manifestInfo),
OPEN_WATCHDOG_INITIAL_DELAY_SEC,
OPEN_WATCHDOG_PERIOD_SEC,
TimeUnit.SECONDS);
try {
DbSourceInter.checkOrInitEngine(getEngine(), dbPath.toString(),
TronError.ErrCode.LEVELDB_INIT);
Expand All @@ -139,7 +163,55 @@ private void openDatabase(Options dbOptions) throws IOException {
logger.error("Open Database {} failed", dataBaseName, e);
}
throw new TronError(e, TronError.ErrCode.LEVELDB_INIT);
} finally {
watchdogTask.cancel(false);
watchdog.shutdownNow();
}
}

/**
* Emits a WARN when factory.open() is still blocked — usually because the
* MANIFEST has grown large enough to make replay expensive.
*/
void logSlowOpen(Path dbPath, long startNs, AtomicReference<String> manifestInfoCache) {
try {
long elapsedSec = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startNs);
String manifestInfo = manifestInfoCache.get();
if (manifestInfo == null) {
manifestInfo = resolveManifestInfo(dbPath.toFile());
manifestInfoCache.compareAndSet(null, manifestInfo);
}
logger.warn("DB {} open still in progress after {}s. path={}, {}. "
+ "This startup will complete; to speed up future restarts, run "
+ "`java -jar Toolkit.jar db archive -d {}` before the next startup "
+ "to rebuild the MANIFEST (the tool requires an exclusive DB lock, "
+ "so it cannot run while the node is up).",
dataBaseName, elapsedSec, dbPath, manifestInfo, parentPath);
} catch (Exception e) {
// Purely observational - never let the watchdog disrupt startup.
logger.debug("db-open-watchdog failure for {}: {}", dataBaseName, e.getMessage());
}
}

private static String resolveManifestInfo(File dbDir) {
File currentFile = new File(dbDir, LEVELDB_CURRENT_FILE);
String name = "none";
long size = 0;
if (currentFile.isFile()) {
try {
name = new String(Files.readAllBytes(currentFile.toPath()),
StandardCharsets.UTF_8).trim();
File manifest = new File(dbDir, name);
if (manifest.isFile()) {
size = manifest.length();
}
} catch (IOException ignored) {
// Best-effort — keep defaults. A new DB won't hit the 60s threshold
// anyway, so reporting 0.00 MB here is the expected shape.
}
}
return String.format(Locale.ROOT, "MANIFEST=%s (%.2f MB)", name,
size / 1024.0 / 1024.0);
}

public Path getDbPath() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ protected void statProperty() {
double size = Double.parseDouble(tmp[2]) * 1048576.0;
Metrics.gaugeSet(MetricKeys.Gauge.DB_SST_LEVEL, files, getEngine(), getName(), level);
Metrics.gaugeSet(MetricKeys.Gauge.DB_SIZE_BYTES, size, getEngine(), getName(), level);
logger.info("DB {}, level:{},files:{},size:{} M",
logger.debug("DB {}, level:{},files:{},size:{} M",
getName(), level, files, size / 1048576.0);
});
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import lombok.Getter;
import lombok.Setter;
Expand Down Expand Up @@ -94,6 +95,8 @@ public class TransactionCapsule implements ProtoCapsule<Transaction> {
.newFixedThreadPool(esName, CommonParameter.getInstance()
.getValidContractProtoThreadNum());
private static final String OWNER_ADDRESS = "ownerAddress_";
// 2-6 ms in general, so we set 50 ms as the threshold for slow signature verification.
private static final long SLOW_SIG_VERIFY_MS = 50;

private Transaction transaction;
@Setter
Expand Down Expand Up @@ -648,6 +651,7 @@ public boolean validatePubSignature(AccountStore accountStore,

byte[] hash = getTransactionId().getBytes();

long startNs = System.nanoTime();
try {
if (!validateSignature(this.transaction, hash, accountStore, dynamicPropertiesStore)) {
isVerified = false;
Expand All @@ -656,12 +660,27 @@ public boolean validatePubSignature(AccountStore accountStore,
} catch (SignatureException | PermissionException | SignatureFormatException e) {
isVerified = false;
throw new ValidateSignatureException(e.getMessage());
} finally {
logSlowSigVerify(startNs);
}
isVerified = true;
}
return true;
}

/**
* INFO-logs when a single signature verification exceeds
* {@link #SLOW_SIG_VERIFY_MS}. Package-private so it can be exercised from
* tests without forcing a real slow crypto path.
*/
void logSlowSigVerify(long startNs) {
long costMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs);
if (costMs > SLOW_SIG_VERIFY_MS) {
logger.info("slow verify: txId={}, sigCount={}, cost={} ms",
getTransactionId(), this.transaction.getSignatureCount(), costMs);
}
}

/**
* validate signature
*/
Expand Down
40 changes: 33 additions & 7 deletions common/src/main/java/org/tron/common/log/LogService.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import ch.qos.logback.classic.LoggerContext;
import ch.qos.logback.classic.joran.JoranConfigurator;
import ch.qos.logback.classic.jul.LevelChangePropagator;
import ch.qos.logback.classic.spi.LoggerContextListener;
import ch.qos.logback.core.util.StatusPrinter;
import java.io.File;
import org.slf4j.LoggerFactory;
Expand All @@ -12,18 +14,42 @@ public class LogService {
public static void load(String path) {
LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
try {
File file = new File(path);
if (!file.exists() || !file.isFile() || !file.canRead()) {
return;
// we must fail fast rather than silently fall back and mislead them into
// thinking their custom config is active.
if (path != null && !path.isEmpty()) {
File file = new File(path);
if (!file.exists() || !file.isFile() || !file.canRead()) {
throw new IllegalArgumentException(
"logback config is not a readable file: " + path);
}
JoranConfigurator configurator = new JoranConfigurator();
configurator.setContext(lc);
lc.reset();
configurator.doConfigure(file);
}
JoranConfigurator configurator = new JoranConfigurator();
configurator.setContext(lc);
lc.reset();
configurator.doConfigure(file);
// Whether we loaded a custom config via --log-config or kept the classpath
// default, make sure Logback level changes are propagated back to JUL so
// gRPC loggers actually honor the levels declared in the XML. If
// the active config already registered a LevelChangePropagator we leave
// it alone.
ensureLevelChangePropagator(lc);
} catch (Exception e) {
throw new TronError(e, TronError.ErrCode.LOG_LOAD);
} finally {
StatusPrinter.printInCaseOfErrorsOrWarnings(lc);
}
}

private static void ensureLevelChangePropagator(LoggerContext lc) {
for (LoggerContextListener listener : lc.getCopyOfListenerList()) {
if (listener instanceof LevelChangePropagator) {
return;
}
}
LevelChangePropagator propagator = new LevelChangePropagator();
propagator.setContext(lc);
propagator.setResetJUL(true);
propagator.start();
lc.addListener(propagator);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Set;
import lombok.Getter;
import lombok.Setter;
import org.slf4j.bridge.SLF4JBridgeHandler;
import org.tron.common.args.GenesisBlock;
import org.tron.common.config.DbBackupConfig;
import org.tron.common.cron.CronExpression;
Expand All @@ -23,6 +24,19 @@

public class CommonParameter {

// Install the JUL->SLF4J bridge early so that JUL log records emitted during
// static init of grpc classes (or from unit tests that don't invoke
// LogService.load()) still reach Logback.
// removeHandlersForRootLogger() strips JUL's default ConsoleHandler so the
// same record is not emitted twice (once by JUL's own console output and
// once via the bridge to Logback).
static {
SLF4JBridgeHandler.removeHandlersForRootLogger();
if (!SLF4JBridgeHandler.isInstalled()) {
SLF4JBridgeHandler.install();
}
}

protected static CommonParameter PARAMETER = new CommonParameter();

// Runtime chain state: set by VMConfig.initVmHardFork()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import ch.qos.logback.core.hook.ShutdownHookBase;
import ch.qos.logback.core.util.Duration;
import org.tron.program.FullNode;

/**
* @author kiven
Expand All @@ -16,11 +15,24 @@ public class TronLogShutdownHook extends ShutdownHookBase {
private static final Duration CHECK_SHUTDOWN_DELAY = Duration.buildByMilliseconds(100);

/**
* The check times before shutdown. default is 60000/100 = 600 times.
* Maximum time to wait for a graceful application shutdown before forcing
* a log flush. Each pool managed by ExecutorServiceManager.shutdownAndAwait-
* Termination() can take up to 120 s in the worst case (60 s await +
* shutdownNow + 60 s await). 180 s is therefore not a hard upper bound, but
* a pragmatic headroom that assumes the many pools in the node shut down
* largely in parallel; in pathological cases trailing shutdown logs may
* still be truncated. In practice 180 s of shutdown output is also enough
* to diagnose most stalls — if a pool is still alive past that window the
* earlier logs already carry the stack/trace context needed to locate the
* offender, so truncating the tail is an acceptable trade-off against
* holding JVM exit open indefinitely.
*/
private final long check_times = 60 * 1000 / CHECK_SHUTDOWN_DELAY.getMilliseconds();
private static final long MAX_WAIT_MS = 3 * 60 * 1000;

// if true, shutdown hook will be executed , for example, 'java -jar FullNode.jar -[v|h]'.
private static final long CHECK_TIMES =
MAX_WAIT_MS / CHECK_SHUTDOWN_DELAY.getMilliseconds();

// if true, shutdown hook will be executed, for example, 'java -jar FullNode.jar -[v|h]'.
public static volatile boolean shutDown = true;

public TronLogShutdownHook() {
Expand All @@ -29,16 +41,19 @@ public TronLogShutdownHook() {
@Override
public void run() {
try {
for (int i = 0; i < check_times; i++) {
for (long i = 0; i < CHECK_TIMES; i++) {
if (shutDown) {
break;
}
addInfo("Sleeping for " + CHECK_SHUTDOWN_DELAY);
if (i % 100 == 0) {
long elapsedSeconds = i * CHECK_SHUTDOWN_DELAY.getMilliseconds() / 1000;
addInfo("Waiting for application shutdown... elapsed=" + elapsedSeconds + "s");
}
Thread.sleep(CHECK_SHUTDOWN_DELAY.getMilliseconds());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
addInfo("TronLogShutdownHook run error :" + e.getMessage());
addInfo("TronLogShutdownHook interrupted: " + e.getMessage());
}
super.stop();
}
Expand Down
20 changes: 13 additions & 7 deletions framework/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

<configuration>

<!-- Propagate logback level changes back to JUL, eliminating the 60x overhead
for disabled log statements when using the jul-to-slf4j bridge.
resetJUL=true clears any explicit JUL logger levels so that the levels
declared below are the effective ones. -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>

<!-- Be sure to flush latest logs on exit -->
<shutdownHook class="org.tron.core.config.TronLogShutdownHook"/>
<conversionRule conversionWord="m"
Expand Down Expand Up @@ -61,19 +69,17 @@
<!-- (the smaller value -> flush occurs often) -->
<queueSize>100</queueSize>
<includeCallerData>true</includeCallerData>
<!-- Allow up to 5 s to drain the queue on shutdown before giving up -->
<maxFlushTime>5000</maxFlushTime>
<appender-ref ref="FILE"/>
</appender>

<root level="INFO">
<!--<appender-ref ref="CONSOLE"/>-->
<appender-ref ref="ASYNC"/>
</root>


<appender name="METRICS" class="org.tron.common.prometheus.InstrumentedAppender" />

<root level="INFO">
<appender-ref ref="METRICS" />
<!--<appender-ref ref="CONSOLE"/>-->
<appender-ref ref="ASYNC"/>
<appender-ref ref="METRICS"/>
</root>

<logger level="INFO" name="app"/>
Expand Down
Loading
Loading