684 lines
22 KiB
Go
684 lines
22 KiB
Go
// Copyright 2020 The Chromium OS Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// ppdTool is a command line tool that can:
|
|
// * download all PPD files from the database kept on the SCS server;
|
|
// * cluster given set of PPD files and return a minimal subset of PPDs that
|
|
// represents resultant clusters. This is useful for choosing a subset of
|
|
// PPD files for testing.
|
|
//
|
|
// The tool can be run with the command:
|
|
// go run ppdTool.go
|
|
// Use -h parameter to print some help and list of accepted parameters.
|
|
//
|
|
// The tool can be also compiled to the binary file with the following command:
|
|
// go build pdfTool.go
|
|
|
|
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"compress/gzip"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// downloadFile starts to download the content from given url with HTTP GET. It
|
|
// returns a reader to the content. In case of an error the function terminates
|
|
// the program.
|
|
func downloadFile(url string) io.ReadCloser {
|
|
response, err := http.Get(url)
|
|
if err != nil {
|
|
log.Fatalf("Cannot HTTP GET the file %s: %s.\n", url, err)
|
|
}
|
|
if response.StatusCode != 200 {
|
|
response.Body.Close()
|
|
log.Fatalf("HTTP GET for the file %s returned status code %d.\n", url, response.StatusCode)
|
|
}
|
|
return response.Body
|
|
}
|
|
|
|
// downloadFilenamesFromPPDIndex retrieves from the index a list of all PPD
|
|
// files. Returned PPD filenames are sorted and unique. In case of an error
|
|
// the function terminates the program.
|
|
func downloadFilenamesFromPPDIndex() []string {
|
|
const urlMetadata = "https://printerconfigurations.googleusercontent.com/chromeos_printing/metadata_v3/"
|
|
|
|
output := make(map[string]bool)
|
|
for i := 0; i < 20; i++ {
|
|
// Calculate a URL of the index file.
|
|
urlPPDIndex := fmt.Sprintf("%sindex-%02d.json", urlMetadata, i)
|
|
// Download and parse the index file.
|
|
respBody := downloadFile(urlPPDIndex)
|
|
defer respBody.Close()
|
|
body, err := ioutil.ReadAll(respBody)
|
|
if err != nil {
|
|
log.Fatalf("Cannot read the content of %s: %s.\n", urlPPDIndex, err)
|
|
}
|
|
// Parse the json structure and extract PPD filenames.
|
|
type jsonName struct {
|
|
Name string `json:"name"`
|
|
}
|
|
type jsonMetadata struct {
|
|
PPDMetadata []jsonName `json:"ppdMetadata"`
|
|
}
|
|
type jsonPrinters struct {
|
|
PPDIndex map[string]jsonMetadata `json:"ppdIndex"`
|
|
}
|
|
var data jsonPrinters
|
|
if err = json.Unmarshal(body, &data); err != nil {
|
|
log.Fatalf("Cannot parse the content of %s: %s.\n", urlPPDIndex, err)
|
|
}
|
|
for _, entry := range data.PPDIndex {
|
|
for _, element := range entry.PPDMetadata {
|
|
output[element.Name] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort filenames.
|
|
results := make([]string, 0, len(output))
|
|
for filename := range output {
|
|
results = append(results, filename)
|
|
}
|
|
sort.Strings(results)
|
|
|
|
return results
|
|
}
|
|
|
|
// listFilenamesFromDirectory returns a list of filenames from the given
|
|
// directory. In case of an error the function terminates the program.
|
|
func listFilenamesFromDirectory(path string) []string {
|
|
files, err := ioutil.ReadDir(path)
|
|
if err != nil {
|
|
log.Fatalf("Cannot open the directory %s: %s.\n", path, err)
|
|
}
|
|
filenames := make([]string, 0, len(files))
|
|
for _, file := range files {
|
|
if !file.IsDir() {
|
|
filenames = append(filenames, file.Name())
|
|
}
|
|
}
|
|
return filenames
|
|
}
|
|
|
|
// Statement represents a single statement from a PPD file.
|
|
type Statement struct {
|
|
keyword string
|
|
option string
|
|
value string
|
|
}
|
|
|
|
// PPD represents a content of a single PPD file as an array of Statements.
|
|
// The field name holds the filename of the PPD file while the field
|
|
// originalDataSize holds the initial size of the field data.
|
|
type PPD struct {
|
|
name string
|
|
data []Statement
|
|
originalDataSize int
|
|
}
|
|
|
|
var reComment = regexp.MustCompile(`^\*[ \t]*%`)
|
|
var reKeywordOptionValue = regexp.MustCompile(`^\*[ \t]*([^: \t]+)([ \t]+[^:]+)?[ \t]*:[ \t]*([^ \t].*)?$`)
|
|
var reKeywordOnly = regexp.MustCompile(`^\*[ \t]*([^: \t]+)[ \t]*$`)
|
|
var reEmptyLine = regexp.MustCompile(`^[ \t]*$`)
|
|
|
|
// parseLine parses a single line from PPD file. The line is supposed to be the
|
|
// first line of statement's definition. If the line contains white characters
|
|
// only or is a comment the function returns empty Statement (st.keyword == "")
|
|
// and finish with success (ok == true).
|
|
func parseLine(line string) (st Statement, ok bool) {
|
|
if reComment.MatchString(line) {
|
|
return st, true
|
|
}
|
|
if m := reKeywordOptionValue.FindStringSubmatch(line); m != nil {
|
|
st.keyword = m[1]
|
|
st.option = m[2]
|
|
st.value = m[3]
|
|
return st, true
|
|
}
|
|
if m := reKeywordOnly.FindStringSubmatch(line); m != nil {
|
|
st.keyword = m[1]
|
|
return st, true
|
|
}
|
|
if reEmptyLine.MatchString(line) {
|
|
return st, true
|
|
}
|
|
return st, false
|
|
}
|
|
|
|
// ParsePPD parses a content of a PPD file. The parameter name is the filename
|
|
// of the PPD file (the source of the content).
|
|
func ParsePPD(name string, content []byte) (PPD, error) {
|
|
ppd := PPD{name: name, data: make([]Statement, 0, 512)}
|
|
scanner := bufio.NewScanner(bytes.NewReader(content))
|
|
var multilineValue = false
|
|
for lineNo := 1; scanner.Scan(); lineNo++ {
|
|
line := scanner.Text()
|
|
if multilineValue {
|
|
// We are inside a multiline value.
|
|
ppd.data[len(ppd.data)-1].value += "\n" + line
|
|
// Check for closing ".
|
|
multilineValue = (strings.Count(line, "\"")%2 == 0)
|
|
continue
|
|
}
|
|
|
|
st, ok := parseLine(line)
|
|
if !ok {
|
|
return ppd, fmt.Errorf("Cannot parse line %d: %s", lineNo, line)
|
|
}
|
|
if st.keyword == "" {
|
|
// A comment or an empty line.
|
|
continue
|
|
}
|
|
ppd.data = append(ppd.data, st)
|
|
// Check for unmatched " in the value.
|
|
multilineValue = (strings.Count(st.value, "\"")%2 != 0)
|
|
}
|
|
ppd.originalDataSize = len(ppd.data)
|
|
return ppd, scanner.Err()
|
|
}
|
|
|
|
var reWhiteSpaces = regexp.MustCompile(`[ \t]+`)
|
|
|
|
// normalizeSpacesAndTabs normalizes subsequences of spaces and tabulators in
|
|
// the given string. All leading and trailing spaces and tabs are removed.
|
|
// Every subsequence consisting of spaces and tabulators is replaced by a
|
|
// single space.
|
|
func normalizeSpacesAndTabs(str *string) {
|
|
*str = strings.TrimSpace(*str)
|
|
*str = reWhiteSpaces.ReplaceAllString(*str, " ")
|
|
}
|
|
|
|
var keywordsToRemove = map[string]bool{
|
|
"1284DeviceID": true,
|
|
"cupsLanguages": true,
|
|
"cupsVersion": true,
|
|
"DefaultDocCutType": true,
|
|
"DefaultInstalledMemory": true,
|
|
"DefaultPageCutType": true,
|
|
"DocCutType": true,
|
|
"driverUrl": true,
|
|
"End": true,
|
|
"FileVersion": true,
|
|
"FoomaticIDs": true,
|
|
"InstalledMemory": true,
|
|
"Manufacturer": true,
|
|
"ModelName": true,
|
|
"NickName": true,
|
|
"PageCutType": true,
|
|
"PCFileName": true,
|
|
"Product": true,
|
|
"ShortNickName": true,
|
|
"Throughput": true}
|
|
|
|
var shortLang = regexp.MustCompile(`^[a-z][a-z]\.`)
|
|
var longLang = regexp.MustCompile(`^[a-z][a-z]_[A-Za-z][A-Za-z]\.`)
|
|
|
|
// normalizePPD processes the given PPD content to make it suitable for
|
|
// comparison with other PPDs. The PPD may be no longer valid after this
|
|
// transformation. The following operations are performed on the PPD:
|
|
// * all statements with keyword included in the global variable
|
|
// keywordsToRemove are removed;
|
|
// * all statements with keyword with prefix matching ^[a-z][a-z]\. or
|
|
// ^[a-z][a-z]_[A-Za-z][A-Za-z]\. are removed (like *pl.MediaType,
|
|
// *de.Translation, *fr_CA.Translation, *zh_TW.MediaType, etc.);
|
|
// * subsequences of white spaces in all statements are normalized with
|
|
// the use of normalizeSpacesAndTabs(...)
|
|
func normalizePPD(ppd *PPD) {
|
|
newData := make([]Statement, 0, len(ppd.data))
|
|
for _, s := range ppd.data {
|
|
if keywordsToRemove[s.keyword] {
|
|
continue
|
|
}
|
|
if shortLang.MatchString(s.keyword) || longLang.MatchString(s.keyword) {
|
|
continue
|
|
}
|
|
normalizeSpacesAndTabs(&s.option)
|
|
normalizeSpacesAndTabs(&s.value)
|
|
newData = append(newData, s)
|
|
}
|
|
ppd.data = newData
|
|
}
|
|
|
|
// parseAndNormalizePPDFile reads the content of a PPD file from the given
|
|
// reader and parses it. The content is also normalized with the normalizePPD
|
|
// function. In case of an error the function terminates the program.
|
|
func parseAndNormalizePPDFile(reader io.ReadCloser, filename string) PPD {
|
|
// Decompress the content if needed.
|
|
if strings.HasSuffix(filename, ".gz") {
|
|
defer reader.Close()
|
|
decomp, err := gzip.NewReader(reader)
|
|
if err != nil {
|
|
log.Fatalf("Error when decompressing the file %s: %s.\n", filename, err)
|
|
}
|
|
reader = decomp
|
|
}
|
|
defer reader.Close()
|
|
content, err := ioutil.ReadAll(reader)
|
|
if err != nil {
|
|
log.Fatalf("Error when reading a content of the file %s: %s.\n", filename, err)
|
|
}
|
|
ppd, err := ParsePPD(filename, content)
|
|
if err != nil {
|
|
log.Fatalf("Error when parsing a content of the file %s: %s.\n", filename, err)
|
|
}
|
|
normalizePPD(&ppd)
|
|
return ppd
|
|
}
|
|
|
|
// checkNotExists terminates the program when the given path exists.
|
|
func checkNotExists(path string) {
|
|
_, err := os.Stat(path)
|
|
if os.IsNotExist(err) {
|
|
return
|
|
}
|
|
if err == nil {
|
|
log.Fatal("File or directory '" + path + "' already exists.")
|
|
}
|
|
log.Fatalf("Cannot access '%s': %s.\n", path, err)
|
|
}
|
|
|
|
// divideIntoLargeClusters divides the input set of PPDs into clusters of PPDs
|
|
// with the same content (data). The output slice contains the resultant
|
|
// clusters saved as a list of PPD names.
|
|
func divideIntoLargeClusters(ppds []PPD) [][]string {
|
|
type ppdTypeDefinition struct {
|
|
cupsFilter string
|
|
cupsModelNumber string
|
|
cupsPreFilter string
|
|
driverName string
|
|
driverType string
|
|
foomaticRIPCommandLine string
|
|
}
|
|
|
|
groups := make(map[ppdTypeDefinition][]int)
|
|
for iPPD, ppd := range ppds {
|
|
chosenKeywords := make(map[string][]string)
|
|
for _, st := range ppd.data {
|
|
switch st.keyword {
|
|
case "cupsFilter", "cupsFilter2", "cupsModelNumber", "cupsPreFilter", "FoomaticRIPCommandLine":
|
|
chosenKeywords[st.keyword] = append(chosenKeywords[st.keyword], st.value)
|
|
case "driverName", "driverType":
|
|
chosenKeywords[st.keyword] = append(chosenKeywords[st.keyword], st.option)
|
|
}
|
|
}
|
|
if values, ok := chosenKeywords["cupsFilter2"]; ok {
|
|
chosenKeywords["cupsFilter"] = values
|
|
delete(chosenKeywords, "cupsFilter2")
|
|
}
|
|
var hash ppdTypeDefinition
|
|
for keyword, values := range chosenKeywords {
|
|
sort.Slice(values, func(i, j int) bool { return values[i] < values[j] })
|
|
switch keyword {
|
|
case "cupsFilter":
|
|
hash.cupsFilter = strings.Join(values, " | ")
|
|
case "cupsModelNumber":
|
|
hash.cupsModelNumber = strings.Join(values, " | ")
|
|
case "cupsPreFilter":
|
|
hash.cupsPreFilter = strings.Join(values, " | ")
|
|
case "driverName":
|
|
hash.driverName = strings.Join(values, " | ")
|
|
case "driverType":
|
|
hash.driverType = strings.Join(values, " | ")
|
|
case "FoomaticRIPCommandLine":
|
|
hash.foomaticRIPCommandLine = strings.Join(values, " | ")
|
|
}
|
|
}
|
|
groups[hash] = append(groups[hash], iPPD)
|
|
}
|
|
|
|
// Sort every group by originalDataSize(decreasing), name(alphabetically).
|
|
for _, ppdIDs := range groups {
|
|
sort.Slice(ppdIDs, func(i, j int) bool {
|
|
p1 := ppdIDs[i]
|
|
p2 := ppdIDs[j]
|
|
if ppds[p1].originalDataSize == ppds[p2].originalDataSize {
|
|
return ppds[p1].name < ppds[p2].name
|
|
}
|
|
return ppds[p1].originalDataSize > ppds[p2].originalDataSize
|
|
})
|
|
}
|
|
|
|
// Convert groups to a slice of slices with names.
|
|
groupsSlice := make([][]string, 0, len(groups))
|
|
for _, group := range groups {
|
|
names := make([]string, len(group))
|
|
for i, iPPD := range group {
|
|
names[i] = ppds[iPPD].name
|
|
}
|
|
groupsSlice = append(groupsSlice, names)
|
|
}
|
|
|
|
sort.Slice(groupsSlice, func(i, j int) bool {
|
|
return groupsSlice[i][0] < groupsSlice[j][0]
|
|
})
|
|
|
|
return groupsSlice
|
|
}
|
|
|
|
// compareSameSizePPDs is a helper function for divideIntoSmallClusters. It
|
|
// divides the set of PPDs into clusters of PPDs with the same data. The input
|
|
// PPDs must have the same size of data field. The function returns resultant
|
|
// clusters as slices with PPDs names.
|
|
func compareSameSizePPDs(ppds []PPD) [][]string {
|
|
// This map holds PPDID->groupID. At the beginning, every PPD is assigned
|
|
// to a one-element group.
|
|
ppdsGroups := make([]int, len(ppds))
|
|
for i := range ppdsGroups {
|
|
ppdsGroups[i] = i
|
|
}
|
|
|
|
// Find PPDs with the same data and assign them to the same group.
|
|
for i1, e1 := range ppds {
|
|
if ppdsGroups[i1] != i1 {
|
|
// This PPD was already assigned.
|
|
continue
|
|
}
|
|
for i2 := i1 + 1; i2 < len(ppds); i2++ {
|
|
e2 := ppds[i2]
|
|
if ppdsGroups[i2] != i2 {
|
|
// This PPD was already assigned.
|
|
continue
|
|
}
|
|
// Compare data.
|
|
match := true
|
|
for ip, s1 := range e1.data {
|
|
s2 := e2.data[ip]
|
|
if s1 != s2 {
|
|
match = false
|
|
break
|
|
}
|
|
}
|
|
if match {
|
|
// Assign i2 to the same group as i1.
|
|
ppdsGroups[i2] = i1
|
|
}
|
|
}
|
|
}
|
|
|
|
// This map contains groupID->[]PPDID.
|
|
groups := make(map[int][]int)
|
|
for iPPD, iGroup := range ppdsGroups {
|
|
groups[iGroup] = append(groups[iGroup], iPPD)
|
|
}
|
|
// Sort every group by originalDataSize(decreasing), name(alphabetically).
|
|
for _, ppdIDs := range groups {
|
|
sort.Slice(ppdIDs, func(i, j int) bool {
|
|
p1 := ppdIDs[i]
|
|
p2 := ppdIDs[j]
|
|
if ppds[p1].originalDataSize == ppds[p2].originalDataSize {
|
|
return ppds[p1].name < ppds[p2].name
|
|
}
|
|
return ppds[p1].originalDataSize > ppds[p2].originalDataSize
|
|
})
|
|
}
|
|
|
|
// Convert groups to a slice of slices with names.
|
|
groupsSlice := make([][]string, 0, len(groups))
|
|
for _, group := range groups {
|
|
names := make([]string, len(group))
|
|
for i, iPPD := range group {
|
|
names[i] = ppds[iPPD].name
|
|
}
|
|
groupsSlice = append(groupsSlice, names)
|
|
}
|
|
|
|
return groupsSlice
|
|
}
|
|
|
|
// divideIntoSmallClusters divides the input set of PPDs into clusters of PPDs
|
|
// with the same content (data). The output slice contains the resultant
|
|
// clusters saved as a list of PPD names.
|
|
func divideIntoSmallClusters(ppds []PPD) [][]string {
|
|
|
|
type ppdHash struct {
|
|
dataSize int
|
|
firstStatement Statement
|
|
middleStatement Statement
|
|
lastStatement Statement
|
|
}
|
|
|
|
ppdsByHash := make(map[ppdHash][]PPD)
|
|
for _, ppd := range ppds {
|
|
var hash ppdHash
|
|
hash.dataSize = len(ppd.data)
|
|
hash.firstStatement = ppd.data[0]
|
|
hash.middleStatement = ppd.data[len(ppd.data)/2]
|
|
hash.lastStatement = ppd.data[len(ppd.data)-1]
|
|
ppdsByHash[hash] = append(ppdsByHash[hash], ppd)
|
|
}
|
|
|
|
chGroups := make(chan [][]string, len(ppdsByHash))
|
|
for _, ppdsToCompare := range ppdsByHash {
|
|
go func(ppdsToCompare []PPD) {
|
|
chGroups <- compareSameSizePPDs(ppdsToCompare)
|
|
}(ppdsToCompare)
|
|
}
|
|
var groups [][]string
|
|
for range ppdsByHash {
|
|
groups = append(groups, <-chGroups...)
|
|
}
|
|
close(chGroups)
|
|
|
|
sort.Slice(groups, func(i, j int) bool {
|
|
return groups[i][0] < groups[j][0]
|
|
})
|
|
|
|
return groups
|
|
}
|
|
|
|
// saveClustersToFile creates a new file at given path and saves there the
|
|
// given list of clusters. In case of any error the function terminates the
|
|
// program.
|
|
func saveClustersToFile(clusters [][]string, path string) {
|
|
file, err := os.Create(path)
|
|
if err != nil {
|
|
log.Fatalf("Cannot create a file %s: %s.\n", path, err)
|
|
}
|
|
defer file.Close()
|
|
for _, cluster := range clusters {
|
|
file.WriteString(strings.Join(cluster, "\t"))
|
|
file.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
// createDirectoryWithPPDs creates directory given in the parameter pathTrg and
|
|
// copies there the given set of files from the directory defined in pathSrc.
|
|
// In case of any error the function terminates the program.
|
|
func createDirectoryWithPPDs(pathSrc string, filenames []string, pathTrg string) {
|
|
if err := os.MkdirAll(pathTrg, 0755); err != nil {
|
|
log.Fatalf("Cannot create a directory '%s': %s.\n", pathTrg, err)
|
|
}
|
|
for _, filename := range filenames {
|
|
src := filepath.Join(pathSrc, filename)
|
|
trg := filepath.Join(pathTrg, filename)
|
|
if err := os.Link(src, trg); err != nil {
|
|
log.Fatalf("Cannot create a hard link %s for the file %s: %s.\n", trg, src, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func commandCompare(args []string) {
|
|
const filenameLargeClusters = "large_clusters.txt"
|
|
const filenameSmallClusters = "small_clusters.txt"
|
|
const dirnameCorePPDs = "ppds_core"
|
|
const dirnameExtPPDs = "ppds_ext"
|
|
|
|
flags := flag.NewFlagSet("compare", flag.ExitOnError)
|
|
flagInput := flags.String("input", "ppds_all", "Directory with PPD files.")
|
|
flagOutput := flags.String("output", ".", "Directory to save results. It is created if not exists.")
|
|
flags.Parse(args)
|
|
|
|
if len(flags.Args()) > 0 {
|
|
log.Fatal("Unknown parameter. Run with -h or --help to see the list of supported parameters.")
|
|
}
|
|
|
|
pathLargeClusters := filepath.Join(*flagOutput, filenameLargeClusters)
|
|
pathSmallClusters := filepath.Join(*flagOutput, filenameSmallClusters)
|
|
pathCorePPDs := filepath.Join(*flagOutput, dirnameCorePPDs)
|
|
pathExtPPDs := filepath.Join(*flagOutput, dirnameExtPPDs)
|
|
|
|
checkNotExists(pathLargeClusters)
|
|
checkNotExists(pathSmallClusters)
|
|
checkNotExists(pathCorePPDs)
|
|
checkNotExists(pathExtPPDs)
|
|
|
|
fmt.Println("Reading a list of PPD files from the directory...")
|
|
filenames := listFilenamesFromDirectory(*flagInput)
|
|
fmt.Printf("Found %d files.\n", len(filenames))
|
|
|
|
fmt.Println("Processing all files...")
|
|
ppds := make([]PPD, len(filenames))
|
|
var wg sync.WaitGroup
|
|
for i, filename := range filenames {
|
|
wg.Add(1)
|
|
go func(i int, filename string) {
|
|
defer wg.Done()
|
|
path := filepath.Join(*flagInput, filename)
|
|
reader, err := os.Open(path)
|
|
if err != nil {
|
|
log.Fatalf("Cannot open the file %s: %s.\n", path, err)
|
|
}
|
|
ppds[i] = parseAndNormalizePPDFile(reader, filename)
|
|
}(i, filename)
|
|
}
|
|
wg.Wait()
|
|
fmt.Println("Done.")
|
|
|
|
fmt.Println("Calculating small clusters...")
|
|
groupsSmall := divideIntoSmallClusters(ppds)
|
|
fmt.Printf("Done. The number of small clusters: %d.\n", len(groupsSmall))
|
|
|
|
fmt.Println("Calculating large clusters...")
|
|
groupsLarge := divideIntoLargeClusters(ppds)
|
|
fmt.Printf("Done. The number of large clusters: %d.\n", len(groupsLarge))
|
|
|
|
filenamesCore := make([]string, 0, len(groupsLarge))
|
|
setFilenameCore := make(map[string]bool)
|
|
for _, group := range groupsLarge {
|
|
filenamesCore = append(filenamesCore, group[0])
|
|
setFilenameCore[group[0]] = true
|
|
}
|
|
filenamesExt := make([]string, 0, len(groupsSmall))
|
|
for _, group := range groupsSmall {
|
|
if !setFilenameCore[group[0]] {
|
|
filenamesExt = append(filenamesExt, group[0])
|
|
}
|
|
}
|
|
|
|
// Save results.
|
|
createDirectoryWithPPDs(*flagInput, filenamesCore, pathCorePPDs)
|
|
createDirectoryWithPPDs(*flagInput, filenamesExt, pathExtPPDs)
|
|
saveClustersToFile(groupsSmall, pathSmallClusters)
|
|
saveClustersToFile(groupsLarge, pathLargeClusters)
|
|
}
|
|
|
|
func commandDownload(args []string) {
|
|
const urlPPD = "https://printerconfigurations.googleusercontent.com/chromeos_printing/ppds_for_metadata_v3/"
|
|
const maxNumberOfParallelDownloads = 4
|
|
|
|
flags := flag.NewFlagSet("download", flag.ExitOnError)
|
|
flagOutput := flags.String("output", "ppds_all", "Directory to save PPD files, it cannot exist.")
|
|
flags.Parse(args)
|
|
|
|
if len(flags.Args()) > 0 {
|
|
log.Fatal("Unknown parameter. Run with -h or --help to see the list of supported parameters.")
|
|
}
|
|
checkNotExists(*flagOutput)
|
|
if err := os.MkdirAll(*flagOutput, 0755); err != nil {
|
|
log.Fatalf("Cannot create a directory '%s': %s.\n", *flagOutput, err)
|
|
}
|
|
|
|
fmt.Println("Downloading a list of PPD files from the index...")
|
|
filenames := downloadFilenamesFromPPDIndex()
|
|
fmt.Printf("Found %d files.\n", len(filenames))
|
|
|
|
fmt.Println("Downloading PPD files...")
|
|
chFilenames := make(chan string)
|
|
var wgEnd sync.WaitGroup
|
|
for i := 0; i < maxNumberOfParallelDownloads; i++ {
|
|
wgEnd.Add(1)
|
|
go func() {
|
|
defer wgEnd.Done()
|
|
for filename := range chFilenames {
|
|
reader := downloadFile(urlPPD + filename)
|
|
path := filepath.Join(*flagOutput, filename)
|
|
file, err := os.Create(path)
|
|
if err != nil {
|
|
log.Fatalf("Cannot create file %s on the disk: %s.\n", path, err)
|
|
}
|
|
if _, err = io.Copy(file, reader); err != nil {
|
|
log.Fatalf("Cannot copy the content of the file %s: %s.\n", path, err)
|
|
}
|
|
reader.Close()
|
|
file.Close()
|
|
}
|
|
}()
|
|
}
|
|
for _, filename := range filenames {
|
|
chFilenames <- filename
|
|
}
|
|
close(chFilenames)
|
|
wgEnd.Wait()
|
|
fmt.Println("Done")
|
|
}
|
|
|
|
const usageText = `
|
|
The first parameter must be one of the following commands:
|
|
|
|
download - downloads all PPDs from the index to the given directory.
|
|
|
|
compare - perform two independent clusterizations on the given set of PPD
|
|
files. Two sets of clusters are calculated:
|
|
* a set of large clusters where PPD are grouped together by pipeline
|
|
types;
|
|
* a set of small clusters where PPD are grouped together by their
|
|
similarity.
|
|
For both results a minimal subsets of representative PPDs are calculated.
|
|
In the output directory, the following files and directories are created:
|
|
* large_clusters.txt - a file with PPD names grouped in large clusters
|
|
* small_clusters.txt - a file with PPD names grouped in small clusters
|
|
* ppds_core - a directory with hard links to PPD files representing
|
|
large clusters, each cluster is represented by exactly one PPD file.
|
|
For the full PPD dataset given on the input, this directory is
|
|
supposed to have around ~100 PPD files;
|
|
* ppds_ext - a directory with hard links to PPD files representing
|
|
small clusters, each cluster is represented by exactly one PPD file.
|
|
IF A PPD FILE IS ALREADY PRESENT IN core_ppds IT IS OMITTED. For the
|
|
full PPD dataset given on the input, this directory is supposed to
|
|
have around ~1500 PPD files minus ~100 PPD files already present in
|
|
the core_ppd directory.
|
|
|
|
Run one of the commands with '-h' or '--help' to get a list of parameters.
|
|
`
|
|
|
|
func main() {
|
|
if len(os.Args) < 2 {
|
|
fmt.Println(usageText)
|
|
return
|
|
}
|
|
|
|
switch os.Args[1] {
|
|
case "compare":
|
|
commandCompare(os.Args[2:])
|
|
case "download":
|
|
commandDownload(os.Args[2:])
|
|
default:
|
|
fmt.Println(usageText)
|
|
}
|
|
}
|