138 lines
5.2 KiB
Java
138 lines
5.2 KiB
Java
/*
|
|
* Copyright (C) 2017 The Guava Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
|
* in compliance with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
* or implied. See the License for the specific language governing permissions and limitations under
|
|
* the License.
|
|
*/
|
|
|
|
package com.google.common.io;
|
|
|
|
import com.google.caliper.BeforeExperiment;
|
|
import com.google.caliper.Benchmark;
|
|
import com.google.caliper.Param;
|
|
import com.google.caliper.api.VmOptions;
|
|
import com.google.common.base.Optional;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.nio.charset.Charset;
|
|
import java.util.Random;
|
|
|
|
/**
|
|
* Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}.
|
|
*/
|
|
// These benchmarks allocate a lot of data so use a large heap
|
|
@VmOptions({"-Xms12g", "-Xmx12g", "-d64"})
|
|
public class ByteSourceAsCharSourceReadBenchmark {
|
|
enum ReadStrategy {
|
|
TO_BYTE_ARRAY_NEW_STRING {
|
|
@Override
|
|
String read(ByteSource byteSource, Charset cs) throws IOException {
|
|
return new String(byteSource.read(), cs);
|
|
}
|
|
},
|
|
USING_CHARSTREAMS_COPY {
|
|
@Override
|
|
String read(ByteSource byteSource, Charset cs) throws IOException {
|
|
StringBuilder sb = new StringBuilder();
|
|
try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
|
|
CharStreams.copy(reader, sb);
|
|
}
|
|
return sb.toString();
|
|
}
|
|
},
|
|
// It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't
|
|
// my best guess is that the jdk authors have spent more time optimizing that callpath than this
|
|
// one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases
|
|
// theoretically we could duplicate all that logic here to try to beat 'new String' or at least
|
|
// come close.
|
|
USING_DECODER_WITH_SIZE_HINT {
|
|
@Override
|
|
String read(ByteSource byteSource, Charset cs) throws IOException {
|
|
Optional<Long> size = byteSource.sizeIfKnown();
|
|
// if we know the size and it fits in an int
|
|
if (size.isPresent() && size.get().longValue() == size.get().intValue()) {
|
|
// otherwise try to presize a StringBuilder
|
|
// it is kind of lame that we need to construct a decoder to access this value.
|
|
// if this is a concern we could add special cases for some known charsets (like utf8)
|
|
// or we could avoid inputstreamreader and use the decoder api directly
|
|
// TODO(lukes): in a real implementation we would need to handle overflow conditions
|
|
int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte());
|
|
char[] buffer = new char[maxChars];
|
|
int bufIndex = 0;
|
|
int remaining = buffer.length;
|
|
try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) {
|
|
int nRead = 0;
|
|
while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) {
|
|
bufIndex += nRead;
|
|
remaining -= nRead;
|
|
}
|
|
if (nRead == -1) {
|
|
// we reached EOF
|
|
return new String(buffer, 0, bufIndex);
|
|
}
|
|
// otherwise we got the size wrong. This can happen if the size changes between when
|
|
// we called sizeIfKnown and when we started reading the file (or i guess if
|
|
// maxCharsPerByte is wrong)
|
|
// Fallback to an incremental approach
|
|
StringBuilder builder = new StringBuilder(bufIndex + 32);
|
|
builder.append(buffer, 0, bufIndex);
|
|
buffer = null; // release for gc
|
|
CharStreams.copy(reader, builder);
|
|
return builder.toString();
|
|
}
|
|
|
|
} else {
|
|
return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs);
|
|
}
|
|
}
|
|
};
|
|
|
|
abstract String read(ByteSource byteSource, Charset cs) throws IOException;
|
|
}
|
|
|
|
@Param({"UTF-8"})
|
|
String charsetName;
|
|
|
|
@Param ReadStrategy strategy;
|
|
|
|
@Param({"10", "1024", "1048576"})
|
|
int size;
|
|
|
|
Charset charset;
|
|
ByteSource data;
|
|
|
|
@BeforeExperiment
|
|
public void setUp() {
|
|
charset = Charset.forName(charsetName);
|
|
StringBuilder sb = new StringBuilder();
|
|
Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior
|
|
sb.ensureCapacity(size);
|
|
for (int k = 0; k < size; k++) {
|
|
// [9-127) includes all ascii non-control characters
|
|
sb.append((char) (random.nextInt(127 - 9) + 9));
|
|
}
|
|
String string = sb.toString();
|
|
sb.setLength(0);
|
|
data = ByteSource.wrap(string.getBytes(charset));
|
|
}
|
|
|
|
@Benchmark
|
|
public int timeCopy(int reps) throws IOException {
|
|
int r = 0;
|
|
final Charset localCharset = charset;
|
|
final ByteSource localData = data;
|
|
final ReadStrategy localStrategy = strategy;
|
|
for (int i = 0; i < reps; i++) {
|
|
r += localStrategy.read(localData, localCharset).hashCode();
|
|
}
|
|
return r;
|
|
}
|
|
}
|