BV compress step crashes with ArrayIndexOutOfBoundsException
This happens repeatably when running on the new dataset. I added some logs:
/*
* Copyright (c) 2022-2023 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
package org.softwareheritage.graph.compress;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.IntStream;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.ImmutableSequentialGraph;
import it.unimi.dsi.big.webgraph.NodeIterator;
import it.unimi.dsi.big.webgraph.Transform;
import it.unimi.dsi.fastutil.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.AllowedNodes;
public class ScatteredArcsORCGraph extends ImmutableSequentialGraph {
private static final Logger LOGGER = LoggerFactory.getLogger(ScatteredArcsORCGraph.class);
/** The default number of threads. */
public static final int DEFAULT_NUM_THREADS = Runtime.getRuntime().availableProcessors();
/** The default batch size. */
public static final int DEFAULT_BATCH_SIZE = Math
.min((int) (Runtime.getRuntime().maxMemory() * 0.4 / (DEFAULT_NUM_THREADS * 8 * 2)), Arrays.MAX_ARRAY_SIZE);
/** The batch graph used to return node iterators. */
private final Transform.BatchGraph batchGraph;
/**
* Creates a scattered-arcs ORC graph.
*
* @param dataset the Swh ORC Graph dataset
* @param function an explicitly provided function from string representing nodes to node numbers,
* or <code>null</code> for the standard behaviour.
* @param n the number of nodes of the graph (used only if <code>function</code> is not
* <code>null</code>).
* @param numThreads the number of threads to use.
* @param batchSize the number of integers in a batch; two arrays of integers of this size will be
* allocated by each thread.
* @param tempDir a temporary directory for the batches, or <code>null</code> for
* {@link File#createTempFile(java.lang.String, java.lang.String)}'s choice.
* @param pl a progress logger, or <code>null</code>.
*/
public ScatteredArcsORCGraph(final ORCGraphDataset dataset, final Object2LongFunction<byte[]> function,
final long n, final int numThreads, final int batchSize, final File tempDir, final ProgressLogger pl)
throws IOException {
final ObjectArrayList<File> batches = new ObjectArrayList<>();
ForkJoinPool forkJoinPool = new ForkJoinPool(numThreads);
long[][] srcArrays = new long[numThreads][batchSize];
long[][] dstArrays = new long[numThreads][batchSize];
int[] indexes = new int[numThreads];
long[] progressCounts = new long[numThreads];
AtomicInteger pairs = new AtomicInteger(0);
AtomicInteger nextThreadId = new AtomicInteger(0);
ThreadLocal<Integer> threadLocalId = ThreadLocal.withInitial(nextThreadId::getAndIncrement);
if (pl != null) {
pl.itemsName = "arcs";
pl.start("Creating sorted batches...");
}
try {
forkJoinPool.submit(() -> {
try {
dataset.readEdges((node) -> {
}, (src, dst, label, perms) -> {
long s = function.getLong(src);
long t = function.getLong(dst);
int threadId = threadLocalId.get();
int idx = indexes[threadId]++;
if (idx <= 2) {
LOGGER.info(String.format("Thread %d (%d) writing at idx = %d", threadId, Thread.currentThread().getId(), idx));
}
if (idx >= batchSize - 3) {
LOGGER.info(String.format("Thread %d (%d) writing at idx = %d", threadId,
Thread.currentThread().getId(), idx));
}
if (batchSize != 45639270) {
LOGGER.info(String.format("Thread %d (%d) has batchSize = %d and idx = %d", threadId,
Thread.currentThread().getId(), batchSize, idx));
}
srcArrays[threadId][idx] = s;
dstArrays[threadId][idx] = t;
if (idx == batchSize - 1) {
LOGGER.info(String.format("Thread %d (%d) processing batch...", threadId,
Thread.currentThread().getId()));
pairs.addAndGet(Transform.processBatch(batchSize, srcArrays[threadId], dstArrays[threadId],
tempDir, batches));
indexes[threadId] = 0;
LOGGER.info(String.format("Thread %d (%d) done processing batch.", threadId,
Thread.currentThread().getId()));
}
if (pl != null && ++progressCounts[threadId] > 1000) {
synchronized (pl) {
pl.update(progressCounts[threadId]);
}
progressCounts[threadId] = 0;
}
});
} catch (IOException e) {
int threadId = threadLocalId.get();
LOGGER.error(String.format("Exception 1 in thread %d (%d): %s", threadId,
Thread.currentThread().getId(), e));
e.printStackTrace(System.out);
throw new RuntimeException(e);
} catch (Exception e) {
int threadId = threadLocalId.get();
e.printStackTrace(System.out);
LOGGER.error(String.format("Exception 2 in thread %d (%d): %s", threadId,
Thread.currentThread().getId(), e));
throw e;
}
}).get();
} catch (InterruptedException | ExecutionException e) {
LOGGER.error(String.format("Exception 3: %s", e));
e.printStackTrace(System.out);
throw new RuntimeException(e);
} catch (Exception e) {
LOGGER.error(String.format("Exception 4: %s", e));
e.printStackTrace(System.out);
throw e;
}
IntStream.range(0, numThreads).parallel().forEach(t -> {
int idx = indexes[t];
if (idx > 0) {
try {
pairs.addAndGet(Transform.processBatch(idx, srcArrays[t], dstArrays[t], tempDir, batches));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
// Trigger the GC to free up the large arrays
for (int i = 0; i < numThreads; i++) {
srcArrays[i] = null;
dstArrays[i] = null;
}
if (pl != null) {
pl.done();
pl.logger().info("Created " + batches.size() + " batches.");
}
batchGraph = new Transform.BatchGraph(n, pairs.get(), batches);
}
// ...
}
which produces about 3/4 through the process:
$ grep -i "thread 44" /srv/softwareheritage/ssd/datasets/2023-08-07/compressed/logs/graph-1692346062163-bv.log | tail -n 20
2023-08-18 16:07:30,778 28788208 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639267
2023-08-18 16:07:30,778 28788208 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639268
2023-08-18 16:07:30,778 28788208 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639269
2023-08-18 16:07:30,778 28788208 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) processing batch...
2023-08-18 16:08:21,741 28839171 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) done processing batch.
2023-08-18 16:08:21,741 28839171 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 0
2023-08-18 16:08:21,741 28839171 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 1
2023-08-18 16:08:21,741 28839171 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 2
2023-08-18 20:07:38,413 43195843 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639267
2023-08-18 20:07:38,414 43195844 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639268
2023-08-18 20:07:38,414 43195844 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639269
2023-08-18 20:07:38,414 43195844 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) processing batch...
2023-08-18 20:08:01,266 43218696 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639270
2023-08-18 20:08:01,496 43218926 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639271
2023-08-18 20:08:02,028 43219458 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 45639272
2023-08-18 20:08:43,723 43261153 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) done processing batch.
2023-08-18 20:08:43,723 43261153 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 0
2023-08-18 20:08:43,724 43261154 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 1
2023-08-18 20:08:43,724 43261154 INFO [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Thread 44 (28) writing at idx = 2
2023-08-18 20:08:48,648 43266078 ERROR [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Exception 2 in thread 44 (28): java.lang.ArrayIndexOutOfBoundsException: Index 45639270 out of bounds for length 45639270
moving indexes[threadId] = 0;
before the call to Transform.processBatch
in order to avoid likelihood of such conflicts, but actually increases the number of such conflicts:
2023-08-19 04:05:07,536 27158461 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 45639267
2023-08-19 04:05:07,536 27158461 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 45639268
2023-08-19 04:05:07,536 27158461 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 45639269
2023-08-19 04:05:07,536 27158461 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) processing batch...
2023-08-19 04:05:07,909 27158834 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 45639267
2023-08-19 04:05:07,921 27158846 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 45639268
2023-08-19 04:05:07,922 27158847 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 45639269
2023-08-19 04:05:07,922 27158847 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) processing batch...
2023-08-19 04:05:08,010 27158935 INFO [ForkJoinPool-1-worker-31] o.s.g.c.ScatteredArcsORCGraph - Thread 80 (58) done processing batch.
2023-08-19 04:05:08,010 27158935 INFO [ForkJoinPool-1-worker-31] o.s.g.c.ScatteredArcsORCGraph - Thread 80 (58) writing at idx = 0
2023-08-19 04:05:08,010 27158935 INFO [ForkJoinPool-1-worker-31] o.s.g.c.ScatteredArcsORCGraph - Thread 80 (58) writing at idx = 1
2023-08-19 04:05:08,010 27158935 INFO [ForkJoinPool-1-worker-31] o.s.g.c.ScatteredArcsORCGraph - Thread 80 (58) writing at idx = 2
2023-08-19 04:05:08,208 27159133 INFO [ForkJoinPool-1-worker-77] o.s.g.c.ScatteredArcsORCGraph - Thread 5 (106) done processing batch.
2023-08-19 04:05:08,208 27159133 INFO [ForkJoinPool-1-worker-77] o.s.g.c.ScatteredArcsORCGraph - Thread 5 (106) writing at idx = 0
2023-08-19 04:05:08,208 27159133 INFO [ForkJoinPool-1-worker-77] o.s.g.c.ScatteredArcsORCGraph - Thread 5 (106) writing at idx = 1
2023-08-19 04:05:08,208 27159133 INFO [ForkJoinPool-1-worker-77] o.s.g.c.ScatteredArcsORCGraph - Thread 5 (106) writing at idx = 2
2023-08-19 04:05:08,219 27159144 INFO [ForkJoinPool-1-worker-83] o.s.g.c.ScatteredArcsORCGraph - Thread 9 (112) done processing batch.
2023-08-19 04:05:08,233 27159158 INFO [ForkJoinPool-1-worker-83] o.s.g.c.ScatteredArcsORCGraph - Thread 9 (112) writing at idx = 0
2023-08-19 04:05:08,233 27159158 INFO [ForkJoinPool-1-worker-83] o.s.g.c.ScatteredArcsORCGraph - Thread 9 (112) writing at idx = 1
2023-08-19 04:05:08,234 27159159 INFO [ForkJoinPool-1-worker-83] o.s.g.c.ScatteredArcsORCGraph - Thread 9 (112) writing at idx = 2
2023-08-19 04:05:09,623 27160548 INFO [ForkJoinPool-1-worker-75] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-de5ce8c8-af38-42cf-b862-665c8699c6fb.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:09,623 27160548 INFO [ForkJoinPool-1-worker-75] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:10,398 27161323 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) writing at idx = 45639270
2023-08-19 04:05:10,506 27161431 INFO [ForkJoinPool-1-worker-75] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-bf233d1f-3361-4b15-b223-ede42e761974.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:10,508 27161433 INFO [ForkJoinPool-1-worker-75] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:10,718 27161643 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) writing at idx = 45639271
2023-08-19 04:05:10,935 27161860 INFO [ForkJoinPool-1-worker-5] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-5d6c5bbd-4a8f-4c84-9faf-fa9f62d48f98.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:10,935 27161860 INFO [ForkJoinPool-1-worker-5] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:10,943 27161868 INFO [ForkJoinPool-1-worker-10] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-e64c516a-30bf-439c-b976-f7ba5210dd6b.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:10,943 27161868 INFO [ForkJoinPool-1-worker-10] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:12,739 27163664 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 45639267
2023-08-19 04:05:12,739 27163664 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 45639268
2023-08-19 04:05:12,739 27163664 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 45639269
2023-08-19 04:05:12,739 27163664 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) processing batch...
2023-08-19 04:05:12,990 27163915 INFO [ForkJoinPool-1-worker-37] o.s.g.c.ScatteredArcsORCGraph - 368,287,625,706 arcs, 7h 31m 14s, 13,602,908.19 arcs/s, 73.51 ns/arc
2023-08-19 04:05:14,272 27165197 INFO [ForkJoinPool-1-worker-9] o.s.g.c.ScatteredArcsORCGraph - Thread 47 (36) done processing batch.
2023-08-19 04:05:14,272 27165197 INFO [ForkJoinPool-1-worker-9] o.s.g.c.ScatteredArcsORCGraph - Thread 47 (36) writing at idx = 0
2023-08-19 04:05:14,273 27165198 INFO [ForkJoinPool-1-worker-9] o.s.g.c.ScatteredArcsORCGraph - Thread 47 (36) writing at idx = 1
2023-08-19 04:05:14,273 27165198 INFO [ForkJoinPool-1-worker-9] o.s.g.c.ScatteredArcsORCGraph - Thread 47 (36) writing at idx = 2
2023-08-19 04:05:16,387 27167312 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 45639267
2023-08-19 04:05:16,387 27167312 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 45639268
2023-08-19 04:05:16,387 27167312 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 45639269
2023-08-19 04:05:16,387 27167312 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) processing batch...
2023-08-19 04:05:16,908 27167833 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) done processing batch.
2023-08-19 04:05:16,917 27167842 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) writing at idx = 0
2023-08-19 04:05:16,917 27167842 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) writing at idx = 1
2023-08-19 04:05:16,918 27167843 INFO [ForkJoinPool-1-worker-75] o.s.g.c.ScatteredArcsORCGraph - Thread 78 (104) writing at idx = 2
2023-08-19 04:05:19,937 27170862 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 45639267
2023-08-19 04:05:19,938 27170863 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 45639268
2023-08-19 04:05:19,938 27170863 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 45639269
2023-08-19 04:05:19,938 27170863 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) processing batch...
2023-08-19 04:05:21,107 27172032 INFO [ForkJoinPool-1-worker-12] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-be382e44-a5c1-4f72-89a6-cc6124fa210b.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:21,107 27172032 INFO [ForkJoinPool-1-worker-12] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:22,975 27173900 INFO [ForkJoinPool-1-worker-2] o.s.g.c.ScatteredArcsORCGraph - Thread 57 (29) done processing batch.
2023-08-19 04:05:22,989 27173914 INFO [ForkJoinPool-1-worker-2] o.s.g.c.ScatteredArcsORCGraph - Thread 57 (29) writing at idx = 0
2023-08-19 04:05:22,989 27173914 INFO [ForkJoinPool-1-worker-2] o.s.g.c.ScatteredArcsORCGraph - Thread 57 (29) writing at idx = 1
2023-08-19 04:05:22,989 27173914 INFO [ForkJoinPool-1-worker-2] o.s.g.c.ScatteredArcsORCGraph - Thread 57 (29) writing at idx = 2
2023-08-19 04:05:23,005 27173930 INFO [ForkJoinPool-1-worker-85] o.s.g.c.ScatteredArcsORCGraph - 368,434,759,693 arcs, 7h 31m 24s, 13,603,318.21 arcs/s, 73.51 ns/arc
2023-08-19 04:05:24,351 27175276 INFO [ForkJoinPool-1-worker-92] o.s.g.c.ScatteredArcsORCGraph - Thread 51 (121) done processing batch.
2023-08-19 04:05:24,353 27175278 INFO [ForkJoinPool-1-worker-92] o.s.g.c.ScatteredArcsORCGraph - Thread 51 (121) writing at idx = 0
2023-08-19 04:05:24,353 27175278 INFO [ForkJoinPool-1-worker-92] o.s.g.c.ScatteredArcsORCGraph - Thread 51 (121) writing at idx = 1
2023-08-19 04:05:24,353 27175278 INFO [ForkJoinPool-1-worker-92] o.s.g.c.ScatteredArcsORCGraph - Thread 51 (121) writing at idx = 2
2023-08-19 04:05:26,633 27177558 INFO [ForkJoinPool-1-worker-6] o.s.g.c.ScatteredArcsORCGraph - Thread 76 (32) done processing batch.
2023-08-19 04:05:26,633 27177558 INFO [ForkJoinPool-1-worker-6] o.s.g.c.ScatteredArcsORCGraph - Thread 76 (32) writing at idx = 0
2023-08-19 04:05:26,633 27177558 INFO [ForkJoinPool-1-worker-6] o.s.g.c.ScatteredArcsORCGraph - Thread 76 (32) writing at idx = 1
2023-08-19 04:05:26,633 27177558 INFO [ForkJoinPool-1-worker-6] o.s.g.c.ScatteredArcsORCGraph - Thread 76 (32) writing at idx = 2
2023-08-19 04:05:29,012 27179937 INFO [ForkJoinPool-1-worker-70] o.s.g.c.ScatteredArcsORCGraph - Thread 43 (98) done processing batch.
2023-08-19 04:05:29,012 27179937 INFO [ForkJoinPool-1-worker-70] o.s.g.c.ScatteredArcsORCGraph - Thread 43 (98) writing at idx = 0
2023-08-19 04:05:29,012 27179937 INFO [ForkJoinPool-1-worker-70] o.s.g.c.ScatteredArcsORCGraph - Thread 43 (98) writing at idx = 1
2023-08-19 04:05:29,012 27179937 INFO [ForkJoinPool-1-worker-70] o.s.g.c.ScatteredArcsORCGraph - Thread 43 (98) writing at idx = 2
2023-08-19 04:05:29,531 27180456 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 45639267
2023-08-19 04:05:29,545 27180470 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 45639268
2023-08-19 04:05:29,545 27180470 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 45639269
2023-08-19 04:05:29,546 27180471 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) processing batch...
2023-08-19 04:05:31,356 27182281 INFO [ForkJoinPool-1-worker-96] o.s.g.c.ScatteredArcsORCGraph - Thread 94 (125) writing at idx = 45639267
2023-08-19 04:05:31,356 27182281 INFO [ForkJoinPool-1-worker-96] o.s.g.c.ScatteredArcsORCGraph - Thread 94 (125) writing at idx = 45639268
2023-08-19 04:05:31,356 27182281 INFO [ForkJoinPool-1-worker-96] o.s.g.c.ScatteredArcsORCGraph - Thread 94 (125) writing at idx = 45639269
2023-08-19 04:05:31,356 27182281 INFO [ForkJoinPool-1-worker-96] o.s.g.c.ScatteredArcsORCGraph - Thread 94 (125) processing batch...
2023-08-19 04:05:32,309 27183234 INFO [ForkJoinPool-1-worker-87] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-ad9cbea8-049a-4250-a734-55d78713c477.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:32,309 27183234 INFO [ForkJoinPool-1-worker-87] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:32,696 27183621 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 45639267
2023-08-19 04:05:32,709 27183634 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 45639268
2023-08-19 04:05:32,709 27183634 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 45639269
2023-08-19 04:05:32,709 27183634 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) processing batch...
2023-08-19 04:05:32,990 27183915 INFO [ForkJoinPool-1-worker-45] o.s.g.c.ScatteredArcsORCGraph - 368,589,992,771 arcs, 7h 31m 34s, 13,604,026.84 arcs/s, 73.51 ns/arc
2023-08-19 04:05:33,781 27184706 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) done processing batch.
2023-08-19 04:05:33,782 27184707 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 0
2023-08-19 04:05:33,782 27184707 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 1
2023-08-19 04:05:33,782 27184707 INFO [ForkJoinPool-1-worker-61] o.s.g.c.ScatteredArcsORCGraph - Thread 17 (89) writing at idx = 2
2023-08-19 04:05:35,000 27185925 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) done processing batch.
2023-08-19 04:05:35,000 27185925 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 0
2023-08-19 04:05:35,000 27185925 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 1
2023-08-19 04:05:35,000 27185925 INFO [ForkJoinPool-1-worker-82] o.s.g.c.ScatteredArcsORCGraph - Thread 22 (111) writing at idx = 2
2023-08-19 04:05:35,011 27185936 INFO [ForkJoinPool-1-worker-4] o.s.g.c.ScatteredArcsORCGraph - Thread 87 (31) writing at idx = 45639267
2023-08-19 04:05:35,037 27185962 INFO [ForkJoinPool-1-worker-4] o.s.g.c.ScatteredArcsORCGraph - Thread 87 (31) writing at idx = 45639268
2023-08-19 04:05:35,041 27185966 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 45639267
2023-08-19 04:05:35,057 27185982 INFO [ForkJoinPool-1-worker-4] o.s.g.c.ScatteredArcsORCGraph - Thread 87 (31) writing at idx = 45639269
2023-08-19 04:05:35,057 27185982 INFO [ForkJoinPool-1-worker-4] o.s.g.c.ScatteredArcsORCGraph - Thread 87 (31) processing batch...
2023-08-19 04:05:35,057 27185982 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 45639268
2023-08-19 04:05:35,057 27185982 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 45639269
2023-08-19 04:05:35,057 27185982 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) processing batch...
2023-08-19 04:05:36,550 27187475 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639267
2023-08-19 04:05:36,573 27187498 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639268
2023-08-19 04:05:36,573 27187498 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639269
2023-08-19 04:05:36,574 27187499 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) processing batch...
2023-08-19 04:05:37,808 27188733 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) writing at idx = 45639267
2023-08-19 04:05:37,809 27188734 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) writing at idx = 45639268
2023-08-19 04:05:37,809 27188734 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) writing at idx = 45639269
2023-08-19 04:05:37,809 27188734 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) processing batch...
2023-08-19 04:05:38,475 27189400 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) done processing batch.
2023-08-19 04:05:38,475 27189400 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 0
2023-08-19 04:05:38,475 27189400 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 1
2023-08-19 04:05:38,475 27189400 INFO [ForkJoinPool-1-worker-71] o.s.g.c.ScatteredArcsORCGraph - Thread 12 (99) writing at idx = 2
2023-08-19 04:05:38,492 27189417 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 45639267
2023-08-19 04:05:38,493 27189418 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 45639268
2023-08-19 04:05:38,493 27189418 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 45639269
2023-08-19 04:05:38,493 27189418 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) processing batch...
2023-08-19 04:05:38,503 27189428 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) done processing batch.
2023-08-19 04:05:38,503 27189428 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 0
2023-08-19 04:05:38,503 27189428 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 1
2023-08-19 04:05:38,503 27189428 INFO [ForkJoinPool-1-worker-66] o.s.g.c.ScatteredArcsORCGraph - Thread 19 (94) writing at idx = 2
2023-08-19 04:05:41,144 27192069 INFO [ForkJoinPool-1-worker-10] o.s.g.c.ScatteredArcsORCGraph - Thread 49 (37) writing at idx = 45639267
2023-08-19 04:05:41,144 27192069 INFO [ForkJoinPool-1-worker-10] o.s.g.c.ScatteredArcsORCGraph - Thread 49 (37) writing at idx = 45639268
2023-08-19 04:05:41,144 27192069 INFO [ForkJoinPool-1-worker-10] o.s.g.c.ScatteredArcsORCGraph - Thread 49 (37) writing at idx = 45639269
2023-08-19 04:05:41,144 27192069 INFO [ForkJoinPool-1-worker-10] o.s.g.c.ScatteredArcsORCGraph - Thread 49 (37) processing batch...
2023-08-19 04:05:42,990 27193915 INFO [ForkJoinPool-1-worker-55] o.s.g.c.ScatteredArcsORCGraph - 368,734,908,542 arcs, 7h 31m 44s, 13,604,354.30 arcs/s, 73.51 ns/arc
2023-08-19 04:05:44,685 27195610 INFO [ForkJoinPool-1-worker-87] o.s.g.c.ScatteredArcsORCGraph - Thread 92 (116) writing at idx = 45639267
2023-08-19 04:05:44,685 27195610 INFO [ForkJoinPool-1-worker-87] o.s.g.c.ScatteredArcsORCGraph - Thread 92 (116) writing at idx = 45639268
2023-08-19 04:05:44,685 27195610 INFO [ForkJoinPool-1-worker-87] o.s.g.c.ScatteredArcsORCGraph - Thread 92 (116) writing at idx = 45639269
2023-08-19 04:05:44,685 27195610 INFO [ForkJoinPool-1-worker-87] o.s.g.c.ScatteredArcsORCGraph - Thread 92 (116) processing batch...
2023-08-19 04:05:44,963 27195888 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) done processing batch.
2023-08-19 04:05:44,977 27195902 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 0
2023-08-19 04:05:44,977 27195902 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 1
2023-08-19 04:05:44,978 27195903 INFO [ForkJoinPool-1-worker-58] o.s.g.c.ScatteredArcsORCGraph - Thread 23 (86) writing at idx = 2
2023-08-19 04:05:52,990 27203915 INFO [ForkJoinPool-1-worker-55] o.s.g.c.ScatteredArcsORCGraph - 368,866,225,728 arcs, 7h 31m 54s, 13,604,179.99 arcs/s, 73.51 ns/arc
2023-08-19 04:05:53,039 27203964 INFO [ForkJoinPool-1-worker-19] o.s.g.c.ScatteredArcsORCGraph - Thread 63 (46) writing at idx = 45639267
2023-08-19 04:05:53,039 27203964 INFO [ForkJoinPool-1-worker-19] o.s.g.c.ScatteredArcsORCGraph - Thread 63 (46) writing at idx = 45639268
2023-08-19 04:05:53,039 27203964 INFO [ForkJoinPool-1-worker-19] o.s.g.c.ScatteredArcsORCGraph - Thread 63 (46) writing at idx = 45639269
2023-08-19 04:05:53,039 27203964 INFO [ForkJoinPool-1-worker-19] o.s.g.c.ScatteredArcsORCGraph - Thread 63 (46) processing batch...
2023-08-19 04:05:56,967 27207892 INFO [ForkJoinPool-1-worker-42] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-0eceb7b6-7904-4c8c-8cea-45e28943c733.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:56,967 27207892 INFO [ForkJoinPool-1-worker-42] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
java.lang.ArrayIndexOutOfBoundsException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:564)
at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:591)
at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:689)
at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
at java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:596)
at java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:765)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcTable(ORCGraphDataset.java:259)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.readEdges(ORCGraphDataset.java:481)
at org.softwareheritage.graph.compress.ORCGraphDataset.readEdges(ORCGraphDataset.java:183)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$2(ScatteredArcsORCGraph.java:93)
at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(ForkJoinTask.java:1375)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1182)
at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1655)
at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1622)
at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:165)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 45639270 out of bounds for length 45639270
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:113)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool.helpJoin(ForkJoinPool.java:1883)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:440)
at java.base/java.util.concurrent.ForkJoinTask.invokeAll(ForkJoinTask.java:721)
at it.unimi.dsi.fastutil.longs.LongArrays$ForkJoinQuickSort2.compute(LongArrays.java:1305)
at java.base/java.util.concurrent.RecursiveAction.exec(RecursiveAction.java:194)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:436)
at java.base/java.util.concurrent.ForkJoinTask.joinForPoolInvoke(ForkJoinTask.java:1040)
at java.base/java.util.concurrent.ForkJoinPool.invoke(ForkJoinPool.java:2639)
at it.unimi.dsi.fastutil.longs.LongArrays.parallelQuickSort(LongArrays.java:1335)
at it.unimi.dsi.big.webgraph.Transform.processBatch(Transform.java:880)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:119)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
... 5 more
2023-08-19 04:05:58,439 27209364 ERROR [ForkJoinPool-1-worker-1] o.s.g.c.ScatteredArcsORCGraph - Exception 2 in thread 45 (28): java.lang.ArrayIndexOutOfBoundsException
2023-08-19 04:05:58,441 27209366 ERROR [main] o.s.g.c.ScatteredArcsORCGraph - Exception 3: java.util.concurrent.ExecutionException: java.lang.ArrayIndexOutOfBoundsException
java.util.concurrent.ExecutionException: java.lang.ArrayIndexOutOfBoundsException
at java.base/java.util.concurrent.ForkJoinTask.reportExecutionException(ForkJoinTask.java:605)
at java.base/java.util.concurrent.ForkJoinTask.get(ForkJoinTask.java:981)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.<init>(ScatteredArcsORCGraph.java:147)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.main(ScatteredArcsORCGraph.java:294)
Caused by: java.lang.ArrayIndexOutOfBoundsException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:564)
at java.base/java.util.concurrent.ForkJoinTask.reportExecutionException(ForkJoinTask.java:604)
... 3 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:564)
at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:591)
at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:689)
at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
at java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:596)
at java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:765)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcTable(ORCGraphDataset.java:259)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.readEdges(ORCGraphDataset.java:481)
at org.softwareheritage.graph.compress.ORCGraphDataset.readEdges(ORCGraphDataset.java:183)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$2(ScatteredArcsORCGraph.java:93)
at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(ForkJoinTask.java:1375)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1182)
at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1655)
at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1622)
at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:165)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 45639270 out of bounds for length 45639270
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:113)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool.helpJoin(ForkJoinPool.java:1883)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:440)
at java.base/java.util.concurrent.ForkJoinTask.invokeAll(ForkJoinTask.java:721)
at it.unimi.dsi.fastutil.longs.LongArrays$ForkJoinQuickSort2.compute(LongArrays.java:1305)
at java.base/java.util.concurrent.RecursiveAction.exec(RecursiveAction.java:194)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:436)
at java.base/java.util.concurrent.ForkJoinTask.joinForPoolInvoke(ForkJoinTask.java:1040)
at java.base/java.util.concurrent.ForkJoinPool.invoke(ForkJoinPool.java:2639)
at it.unimi.dsi.fastutil.longs.LongArrays.parallelQuickSort(LongArrays.java:1335)
at it.unimi.dsi.big.webgraph.Transform.processBatch(Transform.java:880)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:119)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
... 5 more
Exception in thread "main" java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.ArrayIndexOutOfBoundsException
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.<init>(ScatteredArcsORCGraph.java:151)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.main(ScatteredArcsORCGraph.java:294)
Caused by: java.util.concurrent.ExecutionException: java.lang.ArrayIndexOutOfBoundsException
at java.base/java.util.concurrent.ForkJoinTask.reportExecutionException(ForkJoinTask.java:605)
at java.base/java.util.concurrent.ForkJoinTask.get(ForkJoinTask.java:981)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.<init>(ScatteredArcsORCGraph.java:147)
... 1 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:564)
at java.base/java.util.concurrent.ForkJoinTask.reportExecutionException(ForkJoinTask.java:604)
... 3 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:564)
at java.base/java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:591)
at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:689)
at java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
at java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:596)
at java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:765)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcTable(ORCGraphDataset.java:259)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.readEdges(ORCGraphDataset.java:481)
at org.softwareheritage.graph.compress.ORCGraphDataset.readEdges(ORCGraphDataset.java:183)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$2(ScatteredArcsORCGraph.java:93)
at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(ForkJoinTask.java:1375)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1182)
at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1655)
at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1622)
at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:165)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 45639270 out of bounds for length 45639270
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:113)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinPool.helpJoin(ForkJoinPool.java:1883)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:440)
at java.base/java.util.concurrent.ForkJoinTask.invokeAll(ForkJoinTask.java:721)
at it.unimi.dsi.fastutil.longs.LongArrays$ForkJoinQuickSort2.compute(LongArrays.java:1305)
at java.base/java.util.concurrent.RecursiveAction.exec(RecursiveAction.java:194)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373)
at java.base/java.util.concurrent.ForkJoinTask.awaitDone(ForkJoinTask.java:436)
at java.base/java.util.concurrent.ForkJoinTask.joinForPoolInvoke(ForkJoinTask.java:1040)
at java.base/java.util.concurrent.ForkJoinPool.invoke(ForkJoinPool.java:2639)
at it.unimi.dsi.fastutil.longs.LongArrays.parallelQuickSort(LongArrays.java:1335)
at it.unimi.dsi.big.webgraph.Transform.processBatch(Transform.java:880)
at org.softwareheritage.graph.compress.ScatteredArcsORCGraph.lambda$new$1(ScatteredArcsORCGraph.java:119)
at org.softwareheritage.graph.compress.ORCGraphDataset$DirectoryEntryOrcTable.lambda$readEdges$0(ORCGraphDataset.java:505)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.readOrcFile(ORCGraphDataset.java:283)
at org.softwareheritage.graph.compress.ORCGraphDataset$ORCTable.lambda$readOrcTable$0(ORCGraphDataset.java:261)
at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)
at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)
... 5 more
2023-08-19 04:05:58,475 27209400 INFO [ForkJoinPool-1-worker-1] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-88f18499-ab9e-4425-ad80-b7c3cf24aa58.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:58,475 27209400 INFO [ForkJoinPool-1-worker-1] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:59,847 27210772 INFO [ForkJoinPool-1-worker-96] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-d38ca8ba-122c-4d1a-b537-303a6be5ac6a.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:59,848 27210773 INFO [ForkJoinPool-1-worker-96] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:05:59,945 27210870 INFO [ForkJoinPool-1-worker-74] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-69d8637c-129f-4cff-b321-ca2854f9e6be.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:05:59,945 27210870 INFO [ForkJoinPool-1-worker-74] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:00,037 27210962 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) done processing batch.
2023-08-19 04:06:00,037 27210962 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 0
2023-08-19 04:06:00,037 27210962 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 1
2023-08-19 04:06:00,037 27210962 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - Thread 56 (39) writing at idx = 2
2023-08-19 04:06:00,157 27211082 INFO [ForkJoinPool-1-worker-96] o.s.g.c.ScatteredArcsORCGraph - Thread 94 (125) writing at idx = 45639270
2023-08-19 04:06:00,441 27211366 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) done processing batch.
2023-08-19 04:06:00,442 27211367 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 0
2023-08-19 04:06:00,442 27211367 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 1
2023-08-19 04:06:00,442 27211367 INFO [ForkJoinPool-1-worker-41] o.s.g.c.ScatteredArcsORCGraph - Thread 90 (69) writing at idx = 2
2023-08-19 04:06:00,855 27211780 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) writing at idx = 45639270
2023-08-19 04:06:00,978 27211903 INFO [ForkJoinPool-1-worker-21] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-17c0d338-ae36-4862-9e34-2e4ba637b9fb.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:00,978 27211903 INFO [ForkJoinPool-1-worker-21] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:01,435 27212360 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639270
2023-08-19 04:06:01,703 27212628 INFO [ForkJoinPool-1-worker-21] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-7a71611e-c515-4510-9f1b-601086dd2184.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:01,703 27212628 INFO [ForkJoinPool-1-worker-21] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:01,751 27212676 INFO [ForkJoinPool-1-worker-74] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-e454ff9d-1487-42c5-84db-e2512bab159b.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:01,751 27212676 INFO [ForkJoinPool-1-worker-74] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:01,825 27212750 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639271
2023-08-19 04:06:01,827 27212752 INFO [ForkJoinPool-1-worker-87] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-35c67334-054f-4e5f-b99f-6fc80608ec1c.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:01,827 27212752 INFO [ForkJoinPool-1-worker-87] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:01,937 27212862 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) done processing batch.
2023-08-19 04:06:01,938 27212863 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 0
2023-08-19 04:06:01,938 27212863 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 1
2023-08-19 04:06:01,938 27212863 INFO [ForkJoinPool-1-worker-68] o.s.g.c.ScatteredArcsORCGraph - Thread 36 (96) writing at idx = 2
2023-08-19 04:06:02,073 27212998 INFO [ForkJoinPool-1-worker-74] o.s.g.c.ScatteredArcsORCGraph - Thread 86 (103) writing at idx = 45639271
2023-08-19 04:06:02,231 27213156 INFO [ForkJoinPool-1-worker-21] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-3cc8b979-a553-4617-a699-5fbefeb3aa65.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:02,231 27213156 INFO [ForkJoinPool-1-worker-21] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:02,249 27213174 INFO [ForkJoinPool-1-worker-10] o.a.o.i.ReaderImpl - Reading ORC rows from /srv/softwareheritage/ssd/datasets/2023-08-07/orc/directory_entry/directory_entry-28ad5b86-edd6-4ff2-ab9d-4d482f79ae6b.orc with {include: [false, true, true, true, true, true], offset: 0, length: 9223372036854775807, includeAcidColumns: true, allowSARGToFilter: false, useSelected: false}
2023-08-19 04:06:02,250 27213175 INFO [ForkJoinPool-1-worker-10] o.a.o.i.RecordReaderImpl - Reader schema not provided -- using file schema struct<directory_id:string,name:binary,type:string,target:string,perms:int>
2023-08-19 04:06:02,481 27213406 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) done processing batch.
2023-08-19 04:06:02,481 27213406 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 0
2023-08-19 04:06:02,481 27213406 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 1
2023-08-19 04:06:02,481 27213406 INFO [ForkJoinPool-1-worker-20] o.s.g.c.ScatteredArcsORCGraph - Thread 61 (47) writing at idx = 2
2023-08-19 04:06:02,530 27213455 INFO [ForkJoinPool-1-worker-21] o.s.g.c.ScatteredArcsORCGraph - Thread 54 (48) writing at idx = 45639272
2023-08-19 04:06:02,769 27213694 INFO [ForkJoinPool-1-worker-10] o.s.g.c.ScatteredArcsORCGraph - Thread 49 (37) writing at idx = 45639270
2023-08-19 04:06:02,990 27213915 INFO [ForkJoinPool-1-worker-40] o.s.g.c.ScatteredArcsORCGraph - 369,013,145,501 arcs, 7h 32m 4s, 13,604,581.03 arcs/s, 73.50 ns/arc
2023-08-19 04:06:03,159 27214084 INFO [ForkJoinPool-1-worker-87] o.s.g.c.ScatteredArcsORCGraph - Thread 92 (116) writing at idx = 45639270
2023-08-19 04:06:08,318 27219243 INFO [ForkJoinPool-1-worker-38] o.s.g.c.ScatteredArcsORCGraph - Thread 13 (65) writing at idx = 45639267
2023-08-19 04:06:08,318 27219243 INFO [ForkJoinPool-1-worker-38] o.s.g.c.ScatteredArcsORCGraph - Thread 13 (65) writing at idx = 45639268
2023-08-19 04:06:08,318 27219243 INFO [ForkJoinPool-1-worker-38] o.s.g.c.ScatteredArcsORCGraph - Thread 13 (65) writing at idx = 45639269
2023-08-19 04:06:08,318 27219243 INFO [ForkJoinPool-1-worker-38] o.s.g.c.ScatteredArcsORCGraph - Thread 13 (65) processing batch...
2023-08-19 04:06:12,990 27223915 INFO [ForkJoinPool-1-worker-67] o.s.g.c.ScatteredArcsORCGraph - 369,171,144,342 arcs, 7h 32m 14s, 13,605,390.08 arcs/s, 73.50 ns/arc
2023-08-19 04:06:14,078 27225003 INFO [ForkJoinPool-1-worker-59] o.s.g.c.ScatteredArcsORCGraph - Thread 67 (87) writing at idx = 45639267
2023-08-19 04:06:14,093 27225018 INFO [ForkJoinPool-1-worker-59] o.s.g.c.ScatteredArcsORCGraph - Thread 67 (87) writing at idx = 45639268
2023-08-19 04:06:14,094 27225019 INFO [ForkJoinPool-1-worker-59] o.s.g.c.ScatteredArcsORCGraph - Thread 67 (87) writing at idx = 45639269
2023-08-19 04:06:14,094 27225019 INFO [ForkJoinPool-1-worker-59] o.s.g.c.ScatteredArcsORCGraph - Thread 67 (87) processing batch...
2023-08-19 04:06:15,993 27226918 INFO [ForkJoinPool-1-worker-28] o.s.g.c.ScatteredArcsORCGraph - Thread 40 (55) writing at idx = 45639267
2023-08-19 04:06:15,993 27226918 INFO [ForkJoinPool-1-worker-28] o.s.g.c.ScatteredArcsORCGraph - Thread 40 (55) writing at idx = 45639268
2023-08-19 04:06:15,993 27226918 INFO [ForkJoinPool-1-worker-28] o.s.g.c.ScatteredArcsORCGraph - Thread 40 (55) writing at idx = 45639269
2023-08-19 04:06:15,993 27226918 INFO [ForkJoinPool-1-worker-28] o.s.g.c.ScatteredArcsORCGraph - Thread 40 (55) processing batch...
2023-08-19 04:06:17,454 27228379 INFO [ForkJoinPool-1-worker-84] o.s.g.c.ScatteredArcsORCGraph - Thread 16 (113) writing at idx = 45639267
2023-08-19 04:06:17,454 27228379 INFO [ForkJoinPool-1-worker-84] o.s.g.c.ScatteredArcsORCGraph - Thread 16 (113) writing at idx = 45639268
2023-08-19 04:06:17,454 27228379 INFO [ForkJoinPool-1-worker-84] o.s.g.c.ScatteredArcsORCGraph - Thread 16 (113) writing at idx = 45639269
2023-08-19 04:06:17,456 27228381 INFO [ForkJoinPool-1-worker-84] o.s.g.c.ScatteredArcsORCGraph - Thread 16 (113) processing batch...
2023-08-19 04:06:17,465 27228390 INFO [ForkJoinPool-1-worker-50] o.s.g.c.ScatteredArcsORCGraph - Thread 6 (78) writing at idx = 45639267
2023-08-19 04:06:17,467 27228392 INFO [ForkJoinPool-1-worker-50] o.s.g.c.ScatteredArcsORCGraph - Thread 6 (78) writing at idx = 45639268
2023-08-19 04:06:17,467 27228392 INFO [ForkJoinPool-1-worker-50] o.s.g.c.ScatteredArcsORCGraph - Thread 6 (78) writing at idx = 45639269
2023-08-19 04:06:17,467 27228392 INFO [ForkJoinPool-1-worker-50] o.s.g.c.ScatteredArcsORCGraph - Thread 6 (78) processing batch...
2023-08-19 04:06:17,799 27228724 INFO [ForkJoinPool-1-worker-73] o.s.g.c.ScatteredArcsORCGraph - Thread 70 (102) writing at idx = 45639267
2023-08-19 04:06:17,800 27228725 INFO [ForkJoinPool-1-worker-73] o.s.g.c.ScatteredArcsORCGraph - Thread 70 (102) writing at idx = 45639268
2023-08-19 04:06:17,800 27228725 INFO [ForkJoinPool-1-worker-73] o.s.g.c.ScatteredArcsORCGraph - Thread 70 (102) writing at idx = 45639269
2023-08-19 04:06:17,800 27228725 INFO [ForkJoinPool-1-worker-73] o.s.g.c.ScatteredArcsORCGraph - Thread 70 (102) processing batch...
2023-08-19 04:06:22,990 27233915 INFO [ForkJoinPool-1-worker-12] o.s.g.c.ScatteredArcsORCGraph - 369,331,539,577 arcs, 7h 32m 24s, 13,606,286.83 arcs/s, 73.50 ns/arc
2023-08-19 04:06:25,013 27235938 INFO [ForkJoinPool-1-worker-90] o.s.g.c.ScatteredArcsORCGraph - Thread 55 (119) writing at idx = 45639267
2023-08-19 04:06:25,021 27235946 INFO [ForkJoinPool-1-worker-90] o.s.g.c.ScatteredArcsORCGraph - Thread 55 (119) writing at idx = 45639268
2023-08-19 04:06:25,021 27235946 INFO [ForkJoinPool-1-worker-90] o.s.g.c.ScatteredArcsORCGraph - Thread 55 (119) writing at idx = 45639269
2023-08-19 04:06:25,022 27235947 INFO [ForkJoinPool-1-worker-90] o.s.g.c.ScatteredArcsORCGraph - Thread 55 (119) processing batch...
I am enable to reproduce this on small examples.
After ruling every other option, I believe this is a bug in the JVM's JIT compiler. This is compatible with the fact that this compression is compiled with and ran on OpenJDK 17, which previous ones ran was on OpenJDK 11.