/* * Copyright (C) 2017 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.common.io; import com.google.caliper.BeforeExperiment; import com.google.caliper.Benchmark; import com.google.caliper.Param; import com.google.caliper.api.VmOptions; import com.google.common.base.Optional; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.util.Random; /** * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}. */ // These benchmarks allocate a lot of data so use a large heap @VmOptions({"-Xms12g", "-Xmx12g", "-d64"}) public class ByteSourceAsCharSourceReadBenchmark { enum ReadStrategy { TO_BYTE_ARRAY_NEW_STRING { @Override String read(ByteSource byteSource, Charset cs) throws IOException { return new String(byteSource.read(), cs); } }, USING_CHARSTREAMS_COPY { @Override String read(ByteSource byteSource, Charset cs) throws IOException { StringBuilder sb = new StringBuilder(); try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { CharStreams.copy(reader, sb); } return sb.toString(); } }, // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't // my best guess is that the jdk authors have spent more time optimizing that callpath than this // one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases // theoretically we could duplicate all that logic here to try to beat 'new String' or at least // come close. USING_DECODER_WITH_SIZE_HINT { @Override String read(ByteSource byteSource, Charset cs) throws IOException { Optional size = byteSource.sizeIfKnown(); // if we know the size and it fits in an int if (size.isPresent() && size.get().longValue() == size.get().intValue()) { // otherwise try to presize a StringBuilder // it is kind of lame that we need to construct a decoder to access this value. // if this is a concern we could add special cases for some known charsets (like utf8) // or we could avoid inputstreamreader and use the decoder api directly // TODO(lukes): in a real implementation we would need to handle overflow conditions int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte()); char[] buffer = new char[maxChars]; int bufIndex = 0; int remaining = buffer.length; try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { int nRead = 0; while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) { bufIndex += nRead; remaining -= nRead; } if (nRead == -1) { // we reached EOF return new String(buffer, 0, bufIndex); } // otherwise we got the size wrong. This can happen if the size changes between when // we called sizeIfKnown and when we started reading the file (or i guess if // maxCharsPerByte is wrong) // Fallback to an incremental approach StringBuilder builder = new StringBuilder(bufIndex + 32); builder.append(buffer, 0, bufIndex); buffer = null; // release for gc CharStreams.copy(reader, builder); return builder.toString(); } } else { return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs); } } }; abstract String read(ByteSource byteSource, Charset cs) throws IOException; } @Param({"UTF-8"}) String charsetName; @Param ReadStrategy strategy; @Param({"10", "1024", "1048576"}) int size; Charset charset; ByteSource data; @BeforeExperiment public void setUp() { charset = Charset.forName(charsetName); StringBuilder sb = new StringBuilder(); Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior sb.ensureCapacity(size); for (int k = 0; k < size; k++) { // [9-127) includes all ascii non-control characters sb.append((char) (random.nextInt(127 - 9) + 9)); } String string = sb.toString(); sb.setLength(0); data = ByteSource.wrap(string.getBytes(charset)); } @Benchmark public int timeCopy(int reps) throws IOException { int r = 0; final Charset localCharset = charset; final ByteSource localData = data; final ReadStrategy localStrategy = strategy; for (int i = 0; i < reps; i++) { r += localStrategy.read(localData, localCharset).hashCode(); } return r; } }