479 lines
12 KiB
Go
479 lines
12 KiB
Go
// Copyright 2020 The Bazel Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package starlarkjson defines utilities for converting Starlark values
|
|
// to/from JSON strings. The most recent IETF standard for JSON is
|
|
// https://www.ietf.org/rfc/rfc7159.txt.
|
|
package starlarkjson // import "go.starlark.net/starlarkjson"
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"math/big"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"go.starlark.net/starlark"
|
|
"go.starlark.net/starlarkstruct"
|
|
)
|
|
|
|
// Module json is a Starlark module of JSON-related functions.
|
|
//
|
|
// json = module(
|
|
// encode,
|
|
// decode,
|
|
// indent,
|
|
// )
|
|
//
|
|
// def encode(x):
|
|
//
|
|
// The encode function accepts one required positional argument,
|
|
// which it converts to JSON by cases:
|
|
// - A Starlark value that implements Go's standard json.Marshal
|
|
// interface defines its own JSON encoding.
|
|
// - None, True, and False are converted to null, true, and false, respectively.
|
|
// - Starlark int values, no matter how large, are encoded as decimal integers.
|
|
// Some decoders may not be able to decode very large integers.
|
|
// - Starlark float values are encoded using decimal point notation,
|
|
// even if the value is an integer.
|
|
// It is an error to encode a non-finite floating-point value.
|
|
// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
|
|
// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
|
|
// It is an error if any key is not a string.
|
|
// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
|
|
// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
|
|
// It an application-defined type matches more than one the cases describe above,
|
|
// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
|
|
// Encoding any other value yields an error.
|
|
//
|
|
// def decode(x):
|
|
//
|
|
// The decode function accepts one positional parameter, a JSON string.
|
|
// It returns the Starlark value that the string denotes.
|
|
// - Numbers are parsed as int or float, depending on whether they
|
|
// contain a decimal point.
|
|
// - JSON objects are parsed as new unfrozen Starlark dicts.
|
|
// - JSON arrays are parsed as new unfrozen Starlark lists.
|
|
// Decoding fails if x is not a valid JSON string.
|
|
//
|
|
// def indent(str, *, prefix="", indent="\t"):
|
|
//
|
|
// The indent function pretty-prints a valid JSON encoding,
|
|
// and returns a string containing the indented form.
|
|
// It accepts one required positional parameter, the JSON string,
|
|
// and two optional keyword-only string parameters, prefix and indent,
|
|
// that specify a prefix of each new line, and the unit of indentation.
|
|
//
|
|
var Module = &starlarkstruct.Module{
|
|
Name: "json",
|
|
Members: starlark.StringDict{
|
|
"encode": starlark.NewBuiltin("json.encode", encode),
|
|
"decode": starlark.NewBuiltin("json.decode", decode),
|
|
"indent": starlark.NewBuiltin("json.indent", indent),
|
|
},
|
|
}
|
|
|
|
func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
|
|
var x starlark.Value
|
|
if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
var quoteSpace [128]byte
|
|
quote := func(s string) {
|
|
// Non-trivial escaping is handled by Go's encoding/json.
|
|
if isPrintableASCII(s) {
|
|
buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
|
|
} else {
|
|
// TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
|
|
// Can we avoid this call?
|
|
data, _ := json.Marshal(s)
|
|
buf.Write(data)
|
|
}
|
|
}
|
|
|
|
var emit func(x starlark.Value) error
|
|
emit = func(x starlark.Value) error {
|
|
switch x := x.(type) {
|
|
case json.Marshaler:
|
|
// Application-defined starlark.Value types
|
|
// may define their own JSON encoding.
|
|
data, err := x.MarshalJSON()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
buf.Write(data)
|
|
|
|
case starlark.NoneType:
|
|
buf.WriteString("null")
|
|
|
|
case starlark.Bool:
|
|
if x {
|
|
buf.WriteString("true")
|
|
} else {
|
|
buf.WriteString("false")
|
|
}
|
|
|
|
case starlark.Int:
|
|
fmt.Fprint(buf, x)
|
|
|
|
case starlark.Float:
|
|
if !isFinite(float64(x)) {
|
|
return fmt.Errorf("cannot encode non-finite float %v", x)
|
|
}
|
|
fmt.Fprintf(buf, "%g", x) // always contains a decimal point
|
|
|
|
case starlark.String:
|
|
quote(string(x))
|
|
|
|
case starlark.IterableMapping:
|
|
// e.g. dict (must have string keys)
|
|
buf.WriteByte('{')
|
|
items := x.Items()
|
|
for _, item := range items {
|
|
if _, ok := item[0].(starlark.String); !ok {
|
|
return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
|
|
}
|
|
}
|
|
sort.Slice(items, func(i, j int) bool {
|
|
return items[i][0].(starlark.String) < items[j][0].(starlark.String)
|
|
})
|
|
for i, item := range items {
|
|
if i > 0 {
|
|
buf.WriteByte(',')
|
|
}
|
|
k, _ := starlark.AsString(item[0])
|
|
quote(k)
|
|
buf.WriteByte(':')
|
|
if err := emit(item[1]); err != nil {
|
|
return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
|
|
}
|
|
}
|
|
buf.WriteByte('}')
|
|
|
|
case starlark.Iterable:
|
|
// e.g. tuple, list
|
|
buf.WriteByte('[')
|
|
iter := x.Iterate()
|
|
defer iter.Done()
|
|
var elem starlark.Value
|
|
for i := 0; iter.Next(&elem); i++ {
|
|
if i > 0 {
|
|
buf.WriteByte(',')
|
|
}
|
|
if err := emit(elem); err != nil {
|
|
return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
|
|
}
|
|
}
|
|
buf.WriteByte(']')
|
|
|
|
case starlark.HasAttrs:
|
|
// e.g. struct
|
|
buf.WriteByte('{')
|
|
var names []string
|
|
names = append(names, x.AttrNames()...)
|
|
sort.Strings(names)
|
|
for i, name := range names {
|
|
v, err := x.Attr(name)
|
|
if err != nil || v == nil {
|
|
log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
|
|
}
|
|
if i > 0 {
|
|
buf.WriteByte(',')
|
|
}
|
|
quote(name)
|
|
buf.WriteByte(':')
|
|
if err := emit(v); err != nil {
|
|
return fmt.Errorf("in field .%s: %v", name, err)
|
|
}
|
|
}
|
|
buf.WriteByte('}')
|
|
|
|
default:
|
|
return fmt.Errorf("cannot encode %s as JSON", x.Type())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
if err := emit(x); err != nil {
|
|
return nil, fmt.Errorf("%s: %v", b.Name(), err)
|
|
}
|
|
return starlark.String(buf.String()), nil
|
|
}
|
|
|
|
// isPrintableASCII reports whether s contains only printable ASCII.
|
|
func isPrintableASCII(s string) bool {
|
|
for i := 0; i < len(s); i++ {
|
|
b := s[i]
|
|
if b < 0x20 || b >= 0x80 {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// isFinite reports whether f represents a finite rational value.
|
|
// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
|
|
func isFinite(f float64) bool {
|
|
return math.Abs(f) <= math.MaxFloat64
|
|
}
|
|
|
|
func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
|
|
prefix, indent := "", "\t" // keyword-only
|
|
if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
|
|
"prefix?", &prefix,
|
|
"indent?", &indent,
|
|
); err != nil {
|
|
return nil, err
|
|
}
|
|
var str string // positional-only
|
|
if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
buf := new(bytes.Buffer)
|
|
if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
|
|
return nil, fmt.Errorf("%s: %v", b.Name(), err)
|
|
}
|
|
return starlark.String(buf.String()), nil
|
|
}
|
|
|
|
func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
|
|
var s string
|
|
if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// The decoder necessarily makes certain representation choices
|
|
// such as list vs tuple, struct vs dict, int vs float.
|
|
// In principle, we could parameterize it to allow the caller to
|
|
// control the returned types, but there's no compelling need yet.
|
|
|
|
// Use panic/recover with a distinguished type (failure) for error handling.
|
|
type failure string
|
|
fail := func(format string, args ...interface{}) {
|
|
panic(failure(fmt.Sprintf(format, args...)))
|
|
}
|
|
|
|
i := 0
|
|
|
|
// skipSpace consumes leading spaces, and reports whether there is more input.
|
|
skipSpace := func() bool {
|
|
for ; i < len(s); i++ {
|
|
b := s[i]
|
|
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// next consumes leading spaces and returns the first non-space.
|
|
// It panics if at EOF.
|
|
next := func() byte {
|
|
if skipSpace() {
|
|
return s[i]
|
|
}
|
|
fail("unexpected end of file")
|
|
panic("unreachable")
|
|
}
|
|
|
|
// parse returns the next JSON value from the input.
|
|
// It consumes leading but not trailing whitespace.
|
|
// It panics on error.
|
|
var parse func() starlark.Value
|
|
parse = func() starlark.Value {
|
|
b := next()
|
|
switch b {
|
|
case '"':
|
|
// string
|
|
|
|
// Find end of quotation.
|
|
// Also, record whether trivial unquoting is safe.
|
|
// Non-trivial unquoting is handled by Go's encoding/json.
|
|
safe := true
|
|
closed := false
|
|
j := i + 1
|
|
for ; j < len(s); j++ {
|
|
b := s[j]
|
|
if b == '\\' {
|
|
safe = false
|
|
j++ // skip x in \x
|
|
} else if b == '"' {
|
|
closed = true
|
|
j++ // skip '"'
|
|
break
|
|
} else if b >= utf8.RuneSelf {
|
|
safe = false
|
|
}
|
|
}
|
|
if !closed {
|
|
fail("unclosed string literal")
|
|
}
|
|
|
|
r := s[i:j]
|
|
i = j
|
|
|
|
// unquote
|
|
if safe {
|
|
r = r[1 : len(r)-1]
|
|
} else if err := json.Unmarshal([]byte(r), &r); err != nil {
|
|
fail("%s", err)
|
|
}
|
|
return starlark.String(r)
|
|
|
|
case 'n':
|
|
if strings.HasPrefix(s[i:], "null") {
|
|
i += len("null")
|
|
return starlark.None
|
|
}
|
|
|
|
case 't':
|
|
if strings.HasPrefix(s[i:], "true") {
|
|
i += len("true")
|
|
return starlark.True
|
|
}
|
|
|
|
case 'f':
|
|
if strings.HasPrefix(s[i:], "false") {
|
|
i += len("false")
|
|
return starlark.False
|
|
}
|
|
|
|
case '[':
|
|
// array
|
|
var elems []starlark.Value
|
|
|
|
i++ // '['
|
|
b = next()
|
|
if b != ']' {
|
|
for {
|
|
elem := parse()
|
|
elems = append(elems, elem)
|
|
b = next()
|
|
if b != ',' {
|
|
if b != ']' {
|
|
fail("got %q, want ',' or ']'", b)
|
|
}
|
|
break
|
|
}
|
|
i++ // ','
|
|
}
|
|
}
|
|
i++ // ']'
|
|
return starlark.NewList(elems)
|
|
|
|
case '{':
|
|
// object
|
|
dict := new(starlark.Dict)
|
|
|
|
i++ // '{'
|
|
b = next()
|
|
if b != '}' {
|
|
for {
|
|
key := parse()
|
|
if _, ok := key.(starlark.String); !ok {
|
|
fail("got %s for object key, want string", key.Type())
|
|
}
|
|
b = next()
|
|
if b != ':' {
|
|
fail("after object key, got %q, want ':' ", b)
|
|
}
|
|
i++ // ':'
|
|
value := parse()
|
|
dict.SetKey(key, value) // can't fail
|
|
b = next()
|
|
if b != ',' {
|
|
if b != '}' {
|
|
fail("in object, got %q, want ',' or '}'", b)
|
|
}
|
|
break
|
|
}
|
|
i++ // ','
|
|
}
|
|
}
|
|
i++ // '}'
|
|
return dict
|
|
|
|
default:
|
|
// number?
|
|
if isdigit(b) || b == '-' {
|
|
// scan literal. Allow [0-9+-eE.] for now.
|
|
float := false
|
|
var j int
|
|
for j = i + 1; j < len(s); j++ {
|
|
b = s[j]
|
|
if isdigit(b) {
|
|
// ok
|
|
} else if b == '.' ||
|
|
b == 'e' ||
|
|
b == 'E' ||
|
|
b == '+' ||
|
|
b == '-' {
|
|
float = true
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
num := s[i:j]
|
|
i = j
|
|
|
|
// Unlike most C-like languages,
|
|
// JSON disallows a leading zero before a digit.
|
|
digits := num
|
|
if num[0] == '-' {
|
|
digits = num[1:]
|
|
}
|
|
if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
|
|
fail("invalid number: %s", num)
|
|
}
|
|
|
|
// parse literal
|
|
if float {
|
|
x, err := strconv.ParseFloat(num, 64)
|
|
if err != nil {
|
|
fail("invalid number: %s", num)
|
|
}
|
|
return starlark.Float(x)
|
|
} else {
|
|
x, ok := new(big.Int).SetString(num, 10)
|
|
if !ok {
|
|
fail("invalid number: %s", num)
|
|
}
|
|
return starlark.MakeBigInt(x)
|
|
}
|
|
}
|
|
}
|
|
fail("unexpected character %q", b)
|
|
panic("unreachable")
|
|
}
|
|
defer func() {
|
|
x := recover()
|
|
switch x := x.(type) {
|
|
case failure:
|
|
err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
|
|
case nil:
|
|
// nop
|
|
default:
|
|
panic(x) // unexpected panic
|
|
}
|
|
}()
|
|
x := parse()
|
|
if skipSpace() {
|
|
fail("unexpected character %q after value", s[i])
|
|
}
|
|
return x, nil
|
|
}
|
|
|
|
func isdigit(b byte) bool {
|
|
return b >= '0' && b <= '9'
|
|
}
|