4010 lines
		
	
	
		
			135 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			4010 lines
		
	
	
		
			135 KiB
		
	
	
	
		
			C++
		
	
	
	
| /* Debuginfo-over-http server.
 | |
|    Copyright (C) 2019-2021 Red Hat, Inc.
 | |
|    This file is part of elfutils.
 | |
| 
 | |
|    This file is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; either version 3 of the License, or
 | |
|    (at your option) any later version.
 | |
| 
 | |
|    elfutils is distributed in the hope that it will be useful, but
 | |
|    WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 | |
| 
 | |
| 
 | |
| /* cargo-cult from libdwfl linux-kernel-modules.c */
 | |
| /* In case we have a bad fts we include this before config.h because it
 | |
|    can't handle _FILE_OFFSET_BITS.
 | |
|    Everything we need here is fine if its declarations just come first.
 | |
|    Also, include sys/types.h before fts. On some systems fts.h is not self
 | |
|    contained. */
 | |
| #ifdef BAD_FTS
 | |
|   #include <sys/types.h>
 | |
|   #include <fts.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef HAVE_CONFIG_H
 | |
|   #include "config.h"
 | |
| #endif
 | |
| 
 | |
| extern "C" {
 | |
| #include "printversion.h"
 | |
| }
 | |
| 
 | |
| #include "debuginfod.h"
 | |
| #include <dwarf.h>
 | |
| #include <system.h>
 | |
| 
 | |
| #include <argp.h>
 | |
| #ifdef __GNUC__
 | |
| #undef __attribute__ /* glibc bug - rhbz 1763325 */
 | |
| #endif
 | |
| 
 | |
| #include <unistd.h>
 | |
| #include <stdlib.h>
 | |
| #include <libintl.h>
 | |
| #include <locale.h>
 | |
| #include <pthread.h>
 | |
| #include <signal.h>
 | |
| #include <sys/stat.h>
 | |
| #include <sys/time.h>
 | |
| #include <sys/vfs.h>
 | |
| #include <unistd.h>
 | |
| #include <fcntl.h>
 | |
| #include <netdb.h>
 | |
| 
 | |
| 
 | |
| /* If fts.h is included before config.h, its indirect inclusions may not
 | |
|    give us the right LFS aliases of these functions, so map them manually.  */
 | |
| #ifdef BAD_FTS
 | |
|   #ifdef _FILE_OFFSET_BITS
 | |
|     #define open open64
 | |
|     #define fopen fopen64
 | |
|   #endif
 | |
| #else
 | |
|   #include <sys/types.h>
 | |
|   #include <fts.h>
 | |
| #endif
 | |
| 
 | |
| #include <cstring>
 | |
| #include <vector>
 | |
| #include <set>
 | |
| #include <map>
 | |
| #include <string>
 | |
| #include <iostream>
 | |
| #include <iomanip>
 | |
| #include <ostream>
 | |
| #include <sstream>
 | |
| #include <mutex>
 | |
| #include <deque>
 | |
| #include <condition_variable>
 | |
| #include <thread>
 | |
| // #include <regex> // on rhel7 gcc 4.8, not competent
 | |
| #include <regex.h>
 | |
| // #include <algorithm>
 | |
| using namespace std;
 | |
| 
 | |
| #include <gelf.h>
 | |
| #include <libdwelf.h>
 | |
| 
 | |
| #include <microhttpd.h>
 | |
| 
 | |
| #if MHD_VERSION >= 0x00097002
 | |
| // libmicrohttpd 0.9.71 broke API
 | |
| #define MHD_RESULT enum MHD_Result
 | |
| #else
 | |
| #define MHD_RESULT int
 | |
| #endif
 | |
| 
 | |
| #include <curl/curl.h>
 | |
| #include <archive.h>
 | |
| #include <archive_entry.h>
 | |
| #include <sqlite3.h>
 | |
| 
 | |
| #ifdef __linux__
 | |
| #include <sys/syscall.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef __linux__
 | |
| #define tid() syscall(SYS_gettid)
 | |
| #else
 | |
| #define tid() pthread_self()
 | |
| #endif
 | |
| 
 | |
| 
 | |
| inline bool
 | |
| string_endswith(const string& haystack, const string& needle)
 | |
| {
 | |
|   return (haystack.size() >= needle.size() &&
 | |
| 	  equal(haystack.end()-needle.size(), haystack.end(),
 | |
|                 needle.begin()));
 | |
| }
 | |
| 
 | |
| 
 | |
| // Roll this identifier for every sqlite schema incompatibility.
 | |
| #define BUILDIDS "buildids9"
 | |
| 
 | |
| #if SQLITE_VERSION_NUMBER >= 3008000
 | |
| #define WITHOUT_ROWID "without rowid"
 | |
| #else
 | |
| #define WITHOUT_ROWID ""
 | |
| #endif
 | |
| 
 | |
| static const char DEBUGINFOD_SQLITE_DDL[] =
 | |
|   "pragma foreign_keys = on;\n"
 | |
|   "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
 | |
|   "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
 | |
|   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
 | |
|   "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
 | |
|   "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
 | |
|   "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
 | |
|   // NB: all these are overridable with -D option
 | |
| 
 | |
|   // Normalization table for interning file names
 | |
|   "create table if not exists " BUILDIDS "_files (\n"
 | |
|   "        id integer primary key not null,\n"
 | |
|   "        name text unique not null\n"
 | |
|   "        );\n"
 | |
|   // Normalization table for interning buildids
 | |
|   "create table if not exists " BUILDIDS "_buildids (\n"
 | |
|   "        id integer primary key not null,\n"
 | |
|   "        hex text unique not null);\n"
 | |
|   // Track the completion of scanning of a given file & sourcetype at given time
 | |
|   "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
 | |
|   "        mtime integer not null,\n"
 | |
|   "        file integer not null,\n"
 | |
|   "        size integer not null,\n" // in bytes
 | |
|   "        sourcetype text(1) not null\n"
 | |
|   "            check (sourcetype IN ('F', 'R')),\n"
 | |
|   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (file, mtime, sourcetype)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   "create table if not exists " BUILDIDS "_f_de (\n"
 | |
|   "        buildid integer not null,\n"
 | |
|   "        debuginfo_p integer not null,\n"
 | |
|   "        executable_p integer not null,\n"
 | |
|   "        file integer not null,\n"
 | |
|   "        mtime integer not null,\n"
 | |
|   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (buildid, file, mtime)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   "create table if not exists " BUILDIDS "_f_s (\n"
 | |
|   "        buildid integer not null,\n"
 | |
|   "        artifactsrc integer not null,\n"
 | |
|   "        file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
 | |
|   "        mtime integer not null,\n"
 | |
|   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (buildid, artifactsrc, file, mtime)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   "create table if not exists " BUILDIDS "_r_de (\n"
 | |
|   "        buildid integer not null,\n"
 | |
|   "        debuginfo_p integer not null,\n"
 | |
|   "        executable_p integer not null,\n"
 | |
|   "        file integer not null,\n"
 | |
|   "        mtime integer not null,\n"
 | |
|   "        content integer not null,\n"
 | |
|   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
 | |
|   "        buildid integer not null,\n"
 | |
|   "        artifactsrc integer not null,\n"
 | |
|   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (buildid, artifactsrc)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
 | |
|   "        file integer not null,\n"
 | |
|   "        mtime integer not null,\n"
 | |
|   "        content integer not null,\n"
 | |
|   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
 | |
|   "        primary key (content, file, mtime)\n"
 | |
|   "        ) " WITHOUT_ROWID ";\n"
 | |
|   // create views to glue together some of the above tables, for webapi D queries
 | |
|   "create view if not exists " BUILDIDS "_query_d as \n"
 | |
|   "select\n"
 | |
|   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
 | |
|   "        where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
 | |
|   "union all select\n"
 | |
|   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
 | |
|   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
 | |
|   ";"
 | |
|   // ... and for E queries
 | |
|   "create view if not exists " BUILDIDS "_query_e as \n"
 | |
|   "select\n"
 | |
|   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
 | |
|   "        where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
 | |
|   "union all select\n"
 | |
|   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
 | |
|   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
 | |
|   ";"
 | |
|   // ... and for S queries
 | |
|   "create view if not exists " BUILDIDS "_query_s as \n"
 | |
|   "select\n"
 | |
|   "        b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
 | |
|   "        where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
 | |
|   "union all select\n"
 | |
|   "        b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
 | |
|   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
 | |
|   "        " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
 | |
|   "        where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
 | |
|   "        and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
 | |
|   ";"
 | |
|   // and for startup overview counts
 | |
|   "drop view if exists " BUILDIDS "_stats;\n"
 | |
|   "create view if not exists " BUILDIDS "_stats as\n"
 | |
|   "          select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
 | |
|   "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
 | |
|   "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
 | |
|   "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
 | |
|   "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
 | |
|   "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
 | |
|   "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
 | |
|   "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
 | |
|   "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
 | |
| #if SQLITE_VERSION_NUMBER >= 3016000
 | |
|   "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
 | |
| #endif
 | |
|   ";\n"
 | |
| 
 | |
| // schema change history & garbage collection
 | |
| //
 | |
| // XXX: we could have migration queries here to bring prior-schema
 | |
| // data over instead of just dropping it.
 | |
| //
 | |
| // buildids9: widen the mtime_scanned table
 | |
|   "" // <<< we are here
 | |
| // buildids8: slim the sref table
 | |
|   "drop table if exists buildids8_f_de;\n"
 | |
|   "drop table if exists buildids8_f_s;\n"
 | |
|   "drop table if exists buildids8_r_de;\n"
 | |
|   "drop table if exists buildids8_r_sref;\n"
 | |
|   "drop table if exists buildids8_r_sdef;\n"
 | |
|   "drop table if exists buildids8_file_mtime_scanned;\n"
 | |
|   "drop table if exists buildids8_files;\n"
 | |
|   "drop table if exists buildids8_buildids;\n"
 | |
| // buildids7: separate _norm table into dense subtype tables
 | |
|   "drop table if exists buildids7_f_de;\n"
 | |
|   "drop table if exists buildids7_f_s;\n"
 | |
|   "drop table if exists buildids7_r_de;\n"
 | |
|   "drop table if exists buildids7_r_sref;\n"
 | |
|   "drop table if exists buildids7_r_sdef;\n"
 | |
|   "drop table if exists buildids7_file_mtime_scanned;\n"
 | |
|   "drop table if exists buildids7_files;\n"
 | |
|   "drop table if exists buildids7_buildids;\n"
 | |
| // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
 | |
|   "drop table if exists buildids6_norm;\n"
 | |
|   "drop table if exists buildids6_files;\n"
 | |
|   "drop table if exists buildids6_buildids;\n"
 | |
|   "drop view if exists buildids6;\n"
 | |
| // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
 | |
|   "drop table if exists buildids5_norm;\n"
 | |
|   "drop table if exists buildids5_files;\n"
 | |
|   "drop table if exists buildids5_buildids;\n"
 | |
|   "drop table if exists buildids5_bolo;\n"
 | |
|   "drop table if exists buildids5_rfolo;\n"
 | |
|   "drop view if exists buildids5;\n"
 | |
| // buildids4: introduce rpmfile RFOLO
 | |
|   "drop table if exists buildids4_norm;\n"
 | |
|   "drop table if exists buildids4_files;\n"
 | |
|   "drop table if exists buildids4_buildids;\n"
 | |
|   "drop table if exists buildids4_bolo;\n"
 | |
|   "drop table if exists buildids4_rfolo;\n"
 | |
|   "drop view if exists buildids4;\n"
 | |
| // buildids3*: split out srcfile BOLO
 | |
|   "drop table if exists buildids3_norm;\n"
 | |
|   "drop table if exists buildids3_files;\n"
 | |
|   "drop table if exists buildids3_buildids;\n"
 | |
|   "drop table if exists buildids3_bolo;\n"
 | |
|   "drop view if exists buildids3;\n"
 | |
| // buildids2: normalized buildid and filenames into interning tables;
 | |
|   "drop table if exists buildids2_norm;\n"
 | |
|   "drop table if exists buildids2_files;\n"
 | |
|   "drop table if exists buildids2_buildids;\n"
 | |
|   "drop view if exists buildids2;\n"
 | |
|   // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
 | |
| //           lookups from sources, e.g. files or rpms that contain no buildid-indexable content
 | |
|   "drop table if exists buildids1;\n"
 | |
| // buildids: original
 | |
|   "drop table if exists buildids;\n"
 | |
|   ;
 | |
| 
 | |
| static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
 | |
|   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
 | |
|   ;
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| /* Name and version of program.  */
 | |
| /* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++
 | |
| 
 | |
| /* Bug report address.  */
 | |
| ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
 | |
| 
 | |
| /* Definitions of arguments for argp functions.  */
 | |
| static const struct argp_option options[] =
 | |
|   {
 | |
|    { NULL, 0, NULL, 0, "Scanners:", 1 },
 | |
|    { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
 | |
|    { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
 | |
|    { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
 | |
|    { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
 | |
|    // "source-oci-imageregistry"  ...
 | |
| 
 | |
|    { NULL, 0, NULL, 0, "Options:", 2 },
 | |
|    { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
 | |
|    { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
 | |
|    { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
 | |
|    { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
 | |
|    { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 },
 | |
|    { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
 | |
|    { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
 | |
|    { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
 | |
|    { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
 | |
|    { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
 | |
|    { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
 | |
|    { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
 | |
| #define ARGP_KEY_FDCACHE_FDS 0x1001
 | |
|    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
 | |
| #define ARGP_KEY_FDCACHE_MBS 0x1002
 | |
|    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
 | |
| #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
 | |
|    { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
 | |
| #define ARGP_KEY_FDCACHE_MINTMP 0x1004
 | |
|    { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
 | |
| #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
 | |
|    { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", 0,"Megabytes allocated to the \
 | |
|       prefetch cache.", 0},
 | |
| #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
 | |
|    { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", 0,"Number of files allocated to the \
 | |
|       prefetch cache.", 0},
 | |
| #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
 | |
|    {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
 | |
| #define ARGP_KEY_PASSIVE 0x1008
 | |
|    { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
 | |
|    { NULL, 0, NULL, 0, NULL, 0 },
 | |
|   };
 | |
| 
 | |
| /* Short description of program.  */
 | |
| static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
 | |
| 
 | |
| /* Strings for arguments in help texts.  */
 | |
| static const char args_doc[] = "[PATH ...]";
 | |
| 
 | |
| /* Prototype for option handler.  */
 | |
| static error_t parse_opt (int key, char *arg, struct argp_state *state);
 | |
| 
 | |
| /* Data structure to communicate with argp functions.  */
 | |
| static struct argp argp =
 | |
|   {
 | |
|    options, parse_opt, args_doc, doc, NULL, NULL, NULL
 | |
|   };
 | |
| 
 | |
| 
 | |
| static string db_path;
 | |
| static sqlite3 *db;  // single connection, serialized across all our threads!
 | |
| static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
 | |
| static unsigned verbose;
 | |
| static volatile sig_atomic_t interrupted = 0;
 | |
| static volatile sig_atomic_t forced_rescan_count = 0;
 | |
| static volatile sig_atomic_t sigusr1 = 0;
 | |
| static volatile sig_atomic_t forced_groom_count = 0;
 | |
| static volatile sig_atomic_t sigusr2 = 0;
 | |
| static unsigned http_port = 8002;
 | |
| static unsigned rescan_s = 300;
 | |
| static unsigned groom_s = 86400;
 | |
| static bool maxigroom = false;
 | |
| static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
 | |
| static set<string> source_paths;
 | |
| static bool scan_files = false;
 | |
| static map<string,string> scan_archives;
 | |
| static vector<string> extra_ddl;
 | |
| static regex_t file_include_regex;
 | |
| static regex_t file_exclude_regex;
 | |
| static bool regex_groom = false;
 | |
| static bool traverse_logical;
 | |
| static long fdcache_fds;
 | |
| static long fdcache_mbs;
 | |
| static long fdcache_prefetch;
 | |
| static long fdcache_mintmp;
 | |
| static long fdcache_prefetch_mbs;
 | |
| static long fdcache_prefetch_fds;
 | |
| static unsigned forwarded_ttl_limit = 8;
 | |
| static string tmpdir;
 | |
| static bool passive_p = false;
 | |
| 
 | |
| static void set_metric(const string& key, double value);
 | |
| // static void inc_metric(const string& key);
 | |
| static void set_metric(const string& metric,
 | |
|                        const string& lname, const string& lvalue,
 | |
|                        double value);
 | |
| static void inc_metric(const string& metric,
 | |
|                        const string& lname, const string& lvalue);
 | |
| static void add_metric(const string& metric,
 | |
|                        const string& lname, const string& lvalue,
 | |
|                        double value);
 | |
| static void inc_metric(const string& metric,
 | |
|                        const string& lname, const string& lvalue,
 | |
|                        const string& rname, const string& rvalue);
 | |
| static void add_metric(const string& metric,
 | |
|                        const string& lname, const string& lvalue,
 | |
|                        const string& rname, const string& rvalue,                       
 | |
|                        double value);
 | |
| 
 | |
| 
 | |
| class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
 | |
|   string m, n, v;
 | |
| public:
 | |
|   tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
 | |
|     m(mname), n(lname), v(lvalue)
 | |
|   {
 | |
|     add_metric (m, n, v, 1);
 | |
|   }
 | |
|   ~tmp_inc_metric()
 | |
|   {
 | |
|     add_metric (m, n, v, -1);
 | |
|   }
 | |
| };
 | |
| 
 | |
| class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
 | |
|   string m, n, v;
 | |
|   struct timespec ts_start;
 | |
| public:
 | |
|   tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
 | |
|     m(mname), n(lname), v(lvalue)
 | |
|   {
 | |
|     clock_gettime (CLOCK_MONOTONIC, & ts_start);
 | |
|   }
 | |
|   ~tmp_ms_metric()
 | |
|   {
 | |
|     struct timespec ts_end;
 | |
|     clock_gettime (CLOCK_MONOTONIC, & ts_end);
 | |
|     double deltas = (ts_end.tv_sec - ts_start.tv_sec)
 | |
|       + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
 | |
| 
 | |
|     add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
 | |
|     inc_metric (m + "_milliseconds_count", n, v);
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| /* Handle program arguments.  */
 | |
| static error_t
 | |
| parse_opt (int key, char *arg,
 | |
| 	   struct argp_state *state __attribute__ ((unused)))
 | |
| {
 | |
|   int rc;
 | |
|   switch (key)
 | |
|     {
 | |
|     case 'v': verbose ++; break;
 | |
|     case 'd':
 | |
|       /* When using the in-memory database make sure it is shareable,
 | |
| 	 so we can open it twice as read/write and read-only.  */
 | |
|       if (strcmp (arg, ":memory:") == 0)
 | |
| 	db_path = "file::memory:?cache=shared";
 | |
|       else
 | |
| 	db_path = string(arg);
 | |
|       break;
 | |
|     case 'p': http_port = (unsigned) atoi(arg);
 | |
|       if (http_port == 0 || http_port > 65535)
 | |
|         argp_failure(state, 1, EINVAL, "port number");
 | |
|       break;
 | |
|     case 'F': scan_files = true; break;
 | |
|     case 'R':
 | |
|       scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
 | |
|       break;
 | |
|     case 'U':
 | |
|       scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
 | |
|       scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
 | |
|       scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
 | |
|       // .udeb too?
 | |
|       break;
 | |
|     case 'Z':
 | |
|       {
 | |
|         char* extension = strchr(arg, '=');
 | |
|         if (arg[0] == '\0')
 | |
|           argp_failure(state, 1, EINVAL, "missing EXT");
 | |
|         else if (extension)
 | |
|           scan_archives[string(arg, (extension-arg))]=string(extension+1);
 | |
|         else
 | |
|           scan_archives[string(arg)]=string("cat");
 | |
|       }
 | |
|       break;
 | |
|     case 'L':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
 | |
|       traverse_logical = true;
 | |
|       break;
 | |
|     case 'D':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
 | |
|       extra_ddl.push_back(string(arg));
 | |
|       break;
 | |
|     case 't':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
 | |
|       rescan_s = (unsigned) atoi(arg);
 | |
|       break;
 | |
|     case 'g':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
 | |
|       groom_s = (unsigned) atoi(arg);
 | |
|       break;
 | |
|     case 'G':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
 | |
|       maxigroom = true;
 | |
|       break;
 | |
|     case 'c':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
 | |
|       concurrency = (unsigned) atoi(arg);
 | |
|       if (concurrency < 1) concurrency = 1;
 | |
|       break;
 | |
|     case 'I':
 | |
|       // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
 | |
|       regfree (&file_include_regex);
 | |
|       rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
 | |
|       if (rc != 0)
 | |
|         argp_failure(state, 1, EINVAL, "regular expression");
 | |
|       break;
 | |
|     case 'X':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
 | |
|       regfree (&file_exclude_regex);
 | |
|       rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
 | |
|       if (rc != 0)
 | |
|         argp_failure(state, 1, EINVAL, "regular expression");
 | |
|       break;
 | |
|     case 'r':
 | |
|       if (passive_p)
 | |
|         argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
 | |
|       regex_groom = true;
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_FDS:
 | |
|       fdcache_fds = atol (arg);
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_MBS:
 | |
|       fdcache_mbs = atol (arg);
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_PREFETCH:
 | |
|       fdcache_prefetch = atol (arg);
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_MINTMP:
 | |
|       fdcache_mintmp = atol (arg);
 | |
|       if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
 | |
|         argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
 | |
|       break;
 | |
|     case ARGP_KEY_FORWARDED_TTL_LIMIT:
 | |
|       forwarded_ttl_limit = (unsigned) atoi(arg);
 | |
|       break;
 | |
|     case ARGP_KEY_ARG:
 | |
|       source_paths.insert(string(arg));
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_PREFETCH_FDS:
 | |
|       fdcache_prefetch_fds = atol(arg);
 | |
|       if ( fdcache_prefetch_fds < 0)
 | |
|         argp_failure(state, 1, EINVAL, "fdcache prefetch fds");
 | |
|       break;
 | |
|     case ARGP_KEY_FDCACHE_PREFETCH_MBS:
 | |
|       fdcache_prefetch_mbs = atol(arg);
 | |
|       if ( fdcache_prefetch_mbs < 0)
 | |
|         argp_failure(state, 1, EINVAL, "fdcache prefetch mbs");
 | |
|       break;
 | |
|     case ARGP_KEY_PASSIVE:
 | |
|       passive_p = true;
 | |
|       if (source_paths.size() > 0
 | |
|           || maxigroom
 | |
|           || extra_ddl.size() > 0
 | |
|           || traverse_logical)
 | |
|         // other conflicting options tricky to check
 | |
|         argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
 | |
|       break;
 | |
|       // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
 | |
|     default: return ARGP_ERR_UNKNOWN;
 | |
|     }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
 | |
| 
 | |
| struct reportable_exception
 | |
| {
 | |
|   int code;
 | |
|   string message;
 | |
| 
 | |
|   reportable_exception(int c, const string& m): code(c), message(m) {}
 | |
|   reportable_exception(const string& m): code(503), message(m) {}
 | |
|   reportable_exception(): code(503), message() {}
 | |
| 
 | |
|   void report(ostream& o) const; // defined under obatched() class below
 | |
| 
 | |
|   MHD_RESULT mhd_send_response(MHD_Connection* c) const {
 | |
|     MHD_Response* r = MHD_create_response_from_buffer (message.size(),
 | |
|                                                        (void*) message.c_str(),
 | |
|                                                        MHD_RESPMEM_MUST_COPY);
 | |
|     MHD_add_response_header (r, "Content-Type", "text/plain");
 | |
|     MHD_RESULT rc = MHD_queue_response (c, code, r);
 | |
|     MHD_destroy_response (r);
 | |
|     return rc;
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| struct sqlite_exception: public reportable_exception
 | |
| {
 | |
|   sqlite_exception(int rc, const string& msg):
 | |
|     reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
 | |
|     inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
 | |
|   }
 | |
| };
 | |
| 
 | |
| struct libc_exception: public reportable_exception
 | |
| {
 | |
|   libc_exception(int rc, const string& msg):
 | |
|     reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
 | |
|     inc_metric("error_count","libc",strerror(rc));
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| struct archive_exception: public reportable_exception
 | |
| {
 | |
|   archive_exception(const string& msg):
 | |
|     reportable_exception(string("libarchive error: ") + msg) {
 | |
|       inc_metric("error_count","libarchive",msg);
 | |
|   }
 | |
|   archive_exception(struct archive* a, const string& msg):
 | |
|     reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
 | |
|     inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| struct elfutils_exception: public reportable_exception
 | |
| {
 | |
|   elfutils_exception(int rc, const string& msg):
 | |
|     reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
 | |
|     inc_metric("error_count","elfutils",elf_errmsg(rc));
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| template <typename Payload>
 | |
| class workq
 | |
| {
 | |
|   set<Payload> q; // eliminate duplicates
 | |
|   mutex mtx;
 | |
|   condition_variable cv;
 | |
|   bool dead;
 | |
|   unsigned idlers;   // number of threads busy with wait_idle / done_idle
 | |
|   unsigned fronters; // number of threads busy with wait_front / done_front
 | |
| 
 | |
| public:
 | |
|   workq() { dead = false; idlers = 0; fronters = 0; }
 | |
|   ~workq() {}
 | |
| 
 | |
|   void push_back(const Payload& p)
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     q.insert (p);
 | |
|     set_metric("thread_work_pending","role","scan", q.size());
 | |
|     cv.notify_all();
 | |
|   }
 | |
| 
 | |
|   // kill this workqueue, wake up all idlers / scanners
 | |
|   void nuke() {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     // optional: q.clear();
 | |
|     dead = true;
 | |
|     cv.notify_all();
 | |
|   }
 | |
| 
 | |
|   // clear the workqueue, when scanning is interrupted with USR2
 | |
|   void clear() {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     q.clear();
 | |
|     set_metric("thread_work_pending","role","scan", q.size());
 | |
|     // NB: there may still be some live fronters
 | |
|     cv.notify_all(); // maybe wake up waiting idlers
 | |
|   }
 | |
| 
 | |
|   // block this scanner thread until there is work to do and no active idler
 | |
|   bool wait_front (Payload& p)
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     while (!dead && (q.size() == 0 || idlers > 0))
 | |
|       cv.wait(lock);
 | |
|     if (dead)
 | |
|       return false;
 | |
|     else
 | |
|       {
 | |
|         p = * q.begin();
 | |
|         q.erase (q.begin());
 | |
|         fronters ++; // prevent idlers from starting awhile, even if empty q
 | |
|         set_metric("thread_work_pending","role","scan", q.size());
 | |
|         // NB: don't wake up idlers yet!  The consumer is busy
 | |
|         // processing this element until it calls done_front().
 | |
|         return true;
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   // notify waitq that scanner thread is done with that last item
 | |
|   void done_front ()
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     fronters --;
 | |
|     if (q.size() == 0 && fronters == 0)
 | |
|       cv.notify_all(); // maybe wake up waiting idlers
 | |
|   }
 | |
|   
 | |
|   // block this idler thread until there is no work to do
 | |
|   void wait_idle ()
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     cv.notify_all(); // maybe wake up waiting scanners
 | |
|     while (!dead && ((q.size() != 0) || fronters > 0))
 | |
|       cv.wait(lock);
 | |
|     idlers ++;
 | |
|   }
 | |
| 
 | |
|   void done_idle ()
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     idlers --;
 | |
|     cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
 | |
|   }
 | |
| };
 | |
| 
 | |
| typedef struct stat stat_t;
 | |
| typedef pair<string,stat_t> scan_payload;
 | |
| inline bool operator< (const scan_payload& a, const scan_payload& b)
 | |
| {
 | |
|   return a.first < b.first; // don't bother compare the stat fields
 | |
| }
 | |
| static workq<scan_payload> scanq; // just a single one
 | |
| // producer & idler: thread_main_fts_source_paths()
 | |
| // consumer: thread_main_scanner()
 | |
| // idler: thread_main_groom()
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| // Unique set is a thread-safe structure that lends 'ownership' of a value
 | |
| // to a thread.  Other threads requesting the same thing are made to wait.
 | |
| // It's like a semaphore-on-demand.
 | |
| template <typename T>
 | |
| class unique_set
 | |
| {
 | |
| private:
 | |
|   set<T> values;
 | |
|   mutex mtx;
 | |
|   condition_variable cv;
 | |
| public:
 | |
|   unique_set() {}
 | |
|   ~unique_set() {}
 | |
| 
 | |
|   void acquire(const T& value)
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     while (values.find(value) != values.end())
 | |
|       cv.wait(lock);
 | |
|     values.insert(value);
 | |
|   }
 | |
| 
 | |
|   void release(const T& value)
 | |
|   {
 | |
|     unique_lock<mutex> lock(mtx);
 | |
|     // assert (values.find(value) != values.end());
 | |
|     values.erase(value);
 | |
|     cv.notify_all();
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| // This is the object that's instantiate to uniquely hold a value in a
 | |
| // RAII-pattern way.
 | |
| template <typename T>
 | |
| class unique_set_reserver
 | |
| {
 | |
| private:
 | |
|   unique_set<T>& please_hold;
 | |
|   T mine;
 | |
| public:
 | |
|   unique_set_reserver(unique_set<T>& t, const T& value):
 | |
|     please_hold(t), mine(value)  { please_hold.acquire(mine); }
 | |
|   ~unique_set_reserver() { please_hold.release(mine); }
 | |
| };
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| // Print a standard timestamp.
 | |
| static ostream&
 | |
| timestamp (ostream &o)
 | |
| {
 | |
|   char datebuf[80];
 | |
|   char *now2 = NULL;
 | |
|   time_t now_t = time(NULL);
 | |
|   struct tm *now = gmtime (&now_t);
 | |
|   if (now)
 | |
|     {
 | |
|       (void) strftime (datebuf, sizeof (datebuf), "%c", now);
 | |
|       now2 = datebuf;
 | |
|     }
 | |
| 
 | |
|   return o << "[" << (now2 ? now2 : "") << "] "
 | |
|            << "(" << getpid () << "/" << tid() << "): ";
 | |
| }
 | |
| 
 | |
| 
 | |
| // A little class that impersonates an ostream to the extent that it can
 | |
| // take << streaming operations.  It batches up the bits into an internal
 | |
| // stringstream until it is destroyed; then flushes to the original ostream.
 | |
| // It adds a timestamp
 | |
| class obatched
 | |
| {
 | |
| private:
 | |
|   ostream& o;
 | |
|   stringstream stro;
 | |
|   static mutex lock;
 | |
| public:
 | |
|   obatched(ostream& oo, bool timestamp_p = true): o(oo)
 | |
|   {
 | |
|     if (timestamp_p)
 | |
|       timestamp(stro);
 | |
|   }
 | |
|   ~obatched()
 | |
|   {
 | |
|     unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
 | |
|     o << stro.str();
 | |
|     o.flush();
 | |
|   }
 | |
|   operator ostream& () { return stro; }
 | |
|   template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
 | |
| };
 | |
| mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
 | |
| 
 | |
| 
 | |
| void reportable_exception::report(ostream& o) const {
 | |
|   obatched(o) << message << endl;
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| // RAII style sqlite prepared-statement holder that matches { } block lifetime
 | |
| 
 | |
| struct sqlite_ps
 | |
| {
 | |
| private:
 | |
|   sqlite3* db;
 | |
|   const string nickname;
 | |
|   const string sql;
 | |
|   sqlite3_stmt *pp;
 | |
| 
 | |
|   sqlite_ps(const sqlite_ps&); // make uncopyable
 | |
|   sqlite_ps& operator=(const sqlite_ps &); // make unassignable
 | |
| 
 | |
| public:
 | |
|   sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
 | |
|     // tmp_ms_metric tick("sqlite3","prep",nickname);
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " prep " << sql << endl;
 | |
|     int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
 | |
|     if (rc != SQLITE_OK)
 | |
|       throw sqlite_exception(rc, "prepare " + sql);
 | |
|   }
 | |
| 
 | |
|   sqlite_ps& reset()
 | |
|   {
 | |
|     tmp_ms_metric tick("sqlite3","reset",nickname);
 | |
|     sqlite3_reset(this->pp);
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   sqlite_ps& bind(int parameter, const string& str)
 | |
|   {
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
 | |
|     int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
 | |
|     if (rc != SQLITE_OK)
 | |
|       throw sqlite_exception(rc, "sqlite3 bind");
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   sqlite_ps& bind(int parameter, int64_t value)
 | |
|   {
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
 | |
|     int rc = sqlite3_bind_int64 (this->pp, parameter, value);
 | |
|     if (rc != SQLITE_OK)
 | |
|       throw sqlite_exception(rc, "sqlite3 bind");
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   sqlite_ps& bind(int parameter)
 | |
|   {
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
 | |
|     int rc = sqlite3_bind_null (this->pp, parameter);
 | |
|     if (rc != SQLITE_OK)
 | |
|       throw sqlite_exception(rc, "sqlite3 bind");
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
| 
 | |
|   void step_ok_done() {
 | |
|     tmp_ms_metric tick("sqlite3","step_done",nickname);
 | |
|     int rc = sqlite3_step (this->pp);
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
 | |
|     if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
 | |
|       throw sqlite_exception(rc, "sqlite3 step");
 | |
|     (void) sqlite3_reset (this->pp);
 | |
|   }
 | |
| 
 | |
| 
 | |
|   int step() {
 | |
|     tmp_ms_metric tick("sqlite3","step",nickname);
 | |
|     int rc = sqlite3_step (this->pp);
 | |
|     if (verbose > 4)
 | |
|       obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
 | |
|     return rc;
 | |
|   }
 | |
| 
 | |
|   ~sqlite_ps () { sqlite3_finalize (this->pp); }
 | |
|   operator sqlite3_stmt* () { return this->pp; }
 | |
| };
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| // RAII style templated autocloser
 | |
| 
 | |
| template <class Payload, class Ignore>
 | |
| struct defer_dtor
 | |
| {
 | |
| public:
 | |
|   typedef Ignore (*dtor_fn) (Payload);
 | |
| 
 | |
| private:
 | |
|   Payload p;
 | |
|   dtor_fn fn;
 | |
| 
 | |
| public:
 | |
|   defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
 | |
|   ~defer_dtor() { (void) (*fn)(p); }
 | |
| 
 | |
| private:
 | |
|   defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
 | |
|   defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
 | |
| };
 | |
| 
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| static string
 | |
| header_censor(const string& str)
 | |
| {
 | |
|   string y;
 | |
|   for (auto&& x : str)
 | |
|     {
 | |
|       if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
 | |
|         y += x;
 | |
|     }
 | |
|   return y;
 | |
| }
 | |
| 
 | |
| 
 | |
| static string
 | |
| conninfo (struct MHD_Connection * conn)
 | |
| {
 | |
|   char hostname[256]; // RFC1035
 | |
|   char servname[256];
 | |
|   int sts = -1;
 | |
| 
 | |
|   if (conn == 0)
 | |
|     return "internal";
 | |
| 
 | |
|   /* Look up client address data. */
 | |
|   const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
 | |
|                                                                MHD_CONNECTION_INFO_CLIENT_ADDRESS);
 | |
|   struct sockaddr *so = u ? u->client_addr : 0;
 | |
| 
 | |
|   if (so && so->sa_family == AF_INET) {
 | |
|     sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname,
 | |
|                        sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
 | |
|   } else if (so && so->sa_family == AF_INET6) {
 | |
|     sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname),
 | |
|                        servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
 | |
|   }
 | |
|   if (sts != 0) {
 | |
|     hostname[0] = servname[0] = '\0';
 | |
|   }
 | |
| 
 | |
|   // extract headers relevant to administration
 | |
|   const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
 | |
|   const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
 | |
|   // NB: these are untrustworthy, beware if machine-processing log files
 | |
| 
 | |
|   return string(hostname) + string(":") + string(servname) +
 | |
|     string(" UA:") + header_censor(string(user_agent)) +
 | |
|     string(" XFF:") + header_censor(string(x_forwarded_for));
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| static void
 | |
| add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
 | |
| {
 | |
|   struct tm *now = gmtime (&mtime);
 | |
|   if (now != NULL)
 | |
|     {
 | |
|       char datebuf[80];
 | |
|       size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now);
 | |
|       if (rc > 0 && rc < sizeof (datebuf))
 | |
|         (void) MHD_add_response_header (resp, "Last-Modified", datebuf);
 | |
|     }
 | |
| 
 | |
|   (void) MHD_add_response_header (resp, "Cache-Control", "public");
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_buildid_f_match (bool internal_req_t,
 | |
|                         int64_t b_mtime,
 | |
|                         const string& b_source0,
 | |
|                         int *result_fd)
 | |
| {
 | |
|   (void) internal_req_t; // ignored
 | |
|   int fd = open(b_source0.c_str(), O_RDONLY);
 | |
|   if (fd < 0)
 | |
|     throw libc_exception (errno, string("open ") + b_source0);
 | |
| 
 | |
|   // NB: use manual close(2) in error case instead of defer_dtor, because
 | |
|   // in the normal case, we want to hand the fd over to libmicrohttpd for
 | |
|   // file transfer.
 | |
| 
 | |
|   struct stat s;
 | |
|   int rc = fstat(fd, &s);
 | |
|   if (rc < 0)
 | |
|     {
 | |
|       close(fd);
 | |
|       throw libc_exception (errno, string("fstat ") + b_source0);
 | |
|     }
 | |
| 
 | |
|   if ((int64_t) s.st_mtime != b_mtime)
 | |
|     {
 | |
|       if (verbose)
 | |
|         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
 | |
|       close(fd);
 | |
|       return 0;
 | |
|     }
 | |
| 
 | |
|   inc_metric ("http_responses_total","result","file");
 | |
|   struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
 | |
|   if (r == 0)
 | |
|     {
 | |
|       if (verbose)
 | |
|         obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
 | |
|       close(fd);
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       MHD_add_response_header (r, "Content-Type", "application/octet-stream");
 | |
|       std::string file = b_source0.substr(b_source0.find_last_of("/")+1, b_source0.length());
 | |
|       MHD_add_response_header (r, "X-DEBUGINFOD-SIZE", to_string(s.st_size).c_str() );
 | |
|       MHD_add_response_header (r, "X-DEBUGINFOD-FILE", file.c_str() );
 | |
|       add_mhd_last_modified (r, s.st_mtime);
 | |
|       if (verbose > 1)
 | |
|         obatched(clog) << "serving file " << b_source0 << endl;
 | |
|       /* libmicrohttpd will close it. */
 | |
|       if (result_fd)
 | |
|         *result_fd = fd;
 | |
|     }
 | |
| 
 | |
|   return r;
 | |
| }
 | |
| 
 | |
| 
 | |
| // quote all questionable characters of str for safe passage through a sh -c expansion.
 | |
| static string
 | |
| shell_escape(const string& str)
 | |
| {
 | |
|   string y;
 | |
|   for (auto&& x : str)
 | |
|     {
 | |
|       if (! isalnum(x) && x != '/')
 | |
|         y += "\\";
 | |
|       y += x;
 | |
|     }
 | |
|   return y;
 | |
| }
 | |
| 
 | |
| 
 | |
| // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
 | |
| //
 | |
| // Namely:
 | |
| //    //         ->   /
 | |
| //    /foo/../   ->   /
 | |
| //    /./        ->   /
 | |
| //
 | |
| // This mapping is done on dwarf-side source path names, which may
 | |
| // include these constructs, so we can deal with debuginfod clients
 | |
| // that accidentally canonicalize the paths.
 | |
| //
 | |
| // realpath(3) is close but not quite right, because it also resolves
 | |
| // symbolic links.  Symlinks at the debuginfod server have nothing to
 | |
| // do with the build-time symlinks, thus they must not be considered.
 | |
| //
 | |
| // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
 | |
| // see also libc __realpath()
 | |
| // see also llvm llvm::sys::path::remove_dots()
 | |
| static string
 | |
| canon_pathname (const string& input)
 | |
| {
 | |
|   string i = input; // 5.2.4 (1)
 | |
|   string o;
 | |
| 
 | |
|   while (i.size() != 0)
 | |
|     {
 | |
|       // 5.2.4 (2) A
 | |
|       if (i.substr(0,3) == "../")
 | |
|         i = i.substr(3);
 | |
|       else if(i.substr(0,2) == "./")
 | |
|         i = i.substr(2);
 | |
| 
 | |
|       // 5.2.4 (2) B
 | |
|       else if (i.substr(0,3) == "/./")
 | |
|         i = i.substr(2);
 | |
|       else if (i == "/.")
 | |
|         i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
 | |
| 
 | |
|       // 5.2.4 (2) C
 | |
|       else if (i.substr(0,4) == "/../") {
 | |
|         i = i.substr(3);
 | |
|         string::size_type sl = o.rfind("/");
 | |
|         if (sl != string::npos)
 | |
|           o = o.substr(0, sl);
 | |
|         else
 | |
|           o = "";
 | |
|       } else if (i == "/..")
 | |
|         i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
 | |
| 
 | |
|       // 5.2.4 (2) D
 | |
|       // no need to handle these cases; we're dealing with file names
 | |
|       else if (i == ".")
 | |
|         i = "";
 | |
|       else if (i == "..")
 | |
|         i = "";
 | |
| 
 | |
|       // POSIX special: map // to /
 | |
|       else if (i.substr(0,2) == "//")
 | |
|         i = i.substr(1);
 | |
| 
 | |
|       // 5.2.4 (2) E
 | |
|       else {
 | |
|         string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
 | |
|         o += i.substr(0, next_slash);
 | |
|         if (next_slash == string::npos)
 | |
|           i = "";
 | |
|         else
 | |
|           i = i.substr(next_slash);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|   return o;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Estimate available free space for a given filesystem via statfs(2).
 | |
| // Return true if the free fraction is known to be smaller than the
 | |
| // given minimum percentage.  Also update a related metric.
 | |
| bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
 | |
| {
 | |
|   struct statfs sfs;
 | |
|   int rc = statfs(path.c_str(), &sfs);
 | |
|   if (rc == 0)
 | |
|     {
 | |
|       double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
 | |
|       set_metric("filesys_free_ratio","purpose",label, s);
 | |
|       return ((s * 100.0) < minfree);
 | |
|     }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // A map-like class that owns a cache of file descriptors (indexed by
 | |
| // file / content names).
 | |
| //
 | |
| // If only it could use fd's instead of file names ... but we can't
 | |
| // dup(2) to create independent descriptors for the same unlinked
 | |
| // files, so would have to use some goofy linux /proc/self/fd/%d
 | |
| // hack such as the following
 | |
| 
 | |
| #if 0
 | |
| int superdup(int fd)
 | |
| {
 | |
| #ifdef __linux__
 | |
|   char *fdpath = NULL;
 | |
|   int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
 | |
|   int newfd;
 | |
|   if (rc >= 0)
 | |
|     newfd = open(fdpath, O_RDONLY);
 | |
|   else
 | |
|     newfd = -1;
 | |
|   free (fdpath);
 | |
|   return newfd;
 | |
| #else
 | |
|   return -1;
 | |
| #endif
 | |
| }
 | |
| #endif
 | |
| 
 | |
| class libarchive_fdcache
 | |
| {
 | |
| private:
 | |
|   mutex fdcache_lock;
 | |
| 
 | |
|   struct fdcache_entry
 | |
|   {
 | |
|     string archive;
 | |
|     string entry;
 | |
|     string fd;
 | |
|     double fd_size_mb; // slightly rounded up megabytes
 | |
|   };
 | |
|   deque<fdcache_entry> lru; // @head: most recently used
 | |
|   long max_fds;
 | |
|   deque<fdcache_entry> prefetch; // prefetched
 | |
|   long max_mbs;
 | |
|   long max_prefetch_mbs;
 | |
|   long max_prefetch_fds;
 | |
| 
 | |
| public:
 | |
|   void set_metrics()
 | |
|   {
 | |
|     double fdcache_mb = 0.0;
 | |
|     double prefetch_mb = 0.0;
 | |
|     for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|       fdcache_mb += i->fd_size_mb;
 | |
|     for (auto j = prefetch.begin(); j < prefetch.end(); j++)
 | |
|       prefetch_mb += j->fd_size_mb;
 | |
|     set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
 | |
|     set_metric("fdcache_count", lru.size());
 | |
|     set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
 | |
|     set_metric("fdcache_prefetch_count", prefetch.size());
 | |
|   }
 | |
| 
 | |
|   void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
 | |
|   {
 | |
|     {
 | |
|       unique_lock<mutex> lock(fdcache_lock);
 | |
|       // nuke preexisting copy
 | |
|       for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|         {
 | |
|           if (i->archive == a && i->entry == b)
 | |
|             {
 | |
|               unlink (i->fd.c_str());
 | |
|               lru.erase(i);
 | |
|               inc_metric("fdcache_op_count","op","dequeue");
 | |
|               break; // must not continue iterating
 | |
|             }
 | |
|         }
 | |
|       // nuke preexisting copy in prefetch
 | |
|       for (auto i = prefetch.begin(); i < prefetch.end(); i++)
 | |
|         {
 | |
|           if (i->archive == a && i->entry == b)
 | |
|             {
 | |
|               unlink (i->fd.c_str());
 | |
|               prefetch.erase(i);
 | |
|               inc_metric("fdcache_op_count","op","prefetch_dequeue");
 | |
|               break; // must not continue iterating
 | |
|             }
 | |
|         }
 | |
|       double mb = (sz+65535)/1048576.0; // round up to 64K block
 | |
|       fdcache_entry n = { a, b, fd, mb };
 | |
|       if (front_p)
 | |
|         {
 | |
|           inc_metric("fdcache_op_count","op","enqueue");
 | |
|           lru.push_front(n);
 | |
|         }
 | |
|       else
 | |
|         {
 | |
|           inc_metric("fdcache_op_count","op","prefetch_enqueue");
 | |
|           prefetch.push_front(n);
 | |
|         }
 | |
|       if (verbose > 3)
 | |
|         obatched(clog) << "fdcache interned a=" << a << " b=" << b
 | |
|                        << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
 | |
|     }
 | |
|     set_metrics();
 | |
| 
 | |
|     // NB: we age the cache at lookup time too
 | |
|     if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
 | |
|       {
 | |
|         inc_metric("fdcache_op_count","op","emerg-flush");
 | |
|         obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
 | |
|         this->limit(0, 0, 0, 0); // emergency flush
 | |
|       }
 | |
|     else if (front_p)
 | |
|       this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required
 | |
|   }
 | |
| 
 | |
|   int lookup(const string& a, const string& b)
 | |
|   {
 | |
|     int fd = -1;
 | |
|     {
 | |
|       unique_lock<mutex> lock(fdcache_lock);
 | |
|       for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|         {
 | |
|           if (i->archive == a && i->entry == b)
 | |
|             { // found it; move it to head of lru
 | |
|               fdcache_entry n = *i;
 | |
|               lru.erase(i); // invalidates i, so no more iteration!
 | |
|               lru.push_front(n);
 | |
|               inc_metric("fdcache_op_count","op","requeue_front");
 | |
|               fd = open(n.fd.c_str(), O_RDONLY); 
 | |
|               break;
 | |
|             }
 | |
|         }
 | |
|       // Iterate through prefetch while fd == -1 to ensure that no duplication between lru and 
 | |
|       // prefetch occurs.
 | |
|       for ( auto i = prefetch.begin(); fd == -1 && i < prefetch.end(); ++i)
 | |
|         {
 | |
|           if (i->archive == a && i->entry == b)
 | |
|             { // found it; take the entry from the prefetch deque to the lru deque, since it has now been accessed.
 | |
|               fdcache_entry n = *i;
 | |
|               prefetch.erase(i);
 | |
|               lru.push_front(n);
 | |
|               inc_metric("fdcache_op_count","op","prefetch_access");
 | |
|               fd = open(n.fd.c_str(), O_RDONLY); 
 | |
|               break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
 | |
|       {
 | |
|         inc_metric("fdcache_op_count","op","emerg-flush");
 | |
|         obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
 | |
|         this->limit(0, 0, 0, 0); // emergency flush
 | |
|       }
 | |
|     else if (fd >= 0)
 | |
|       this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required
 | |
| 
 | |
|     return fd;
 | |
|   }
 | |
| 
 | |
|   int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
 | |
|   {
 | |
|     unique_lock<mutex> lock(fdcache_lock);
 | |
|     for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|       {
 | |
|         if (i->archive == a && i->entry == b)
 | |
|           {
 | |
|             inc_metric("fdcache_op_count","op","probe_hit");
 | |
|             return true;
 | |
|           }
 | |
|       }
 | |
|     for (auto i = prefetch.begin(); i < prefetch.end(); i++)
 | |
|       {
 | |
|         if (i->archive == a && i->entry == b)
 | |
|           {
 | |
|             inc_metric("fdcache_op_count","op","prefetch_probe_hit");
 | |
|             return true;
 | |
|           }
 | |
|       }
 | |
|     inc_metric("fdcache_op_count","op","probe_miss");
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   void clear(const string& a, const string& b)
 | |
|   {
 | |
|     unique_lock<mutex> lock(fdcache_lock);
 | |
|     for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|       {
 | |
|         if (i->archive == a && i->entry == b)
 | |
|           { // found it; erase it from lru
 | |
|             fdcache_entry n = *i;
 | |
|             lru.erase(i); // invalidates i, so no more iteration!
 | |
|             inc_metric("fdcache_op_count","op","clear");
 | |
|             unlink (n.fd.c_str());
 | |
|             set_metrics();
 | |
|             return;
 | |
|           }
 | |
|       }
 | |
|     for (auto i = prefetch.begin(); i < prefetch.end(); i++)
 | |
|       {
 | |
|         if (i->archive == a && i->entry == b)
 | |
|           { // found it; erase it from lru
 | |
|             fdcache_entry n = *i;
 | |
|             prefetch.erase(i); // invalidates i, so no more iteration!
 | |
|             inc_metric("fdcache_op_count","op","prefetch_clear");
 | |
|             unlink (n.fd.c_str());
 | |
|             set_metrics();
 | |
|             return;
 | |
|           }
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   void limit(long maxfds, long maxmbs, long maxprefetchfds, long maxprefetchmbs , bool metrics_p = true)
 | |
|   {
 | |
|     if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
 | |
|       obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
 | |
| 
 | |
|     unique_lock<mutex> lock(fdcache_lock);
 | |
|     this->max_fds = maxfds;
 | |
|     this->max_mbs = maxmbs;
 | |
|     this->max_prefetch_fds = maxprefetchfds;
 | |
|     this->max_prefetch_mbs = maxprefetchmbs;
 | |
|     long total_fd = 0;
 | |
|     double total_mb = 0.0;
 | |
|     for (auto i = lru.begin(); i < lru.end(); i++)
 | |
|       {
 | |
|         // accumulate totals from most recently used one going backward
 | |
|         total_fd ++;
 | |
|         total_mb += i->fd_size_mb;
 | |
|         if (total_fd > this->max_fds || total_mb > this->max_mbs)
 | |
|           {
 | |
|             // found the cut here point!
 | |
| 
 | |
|             for (auto j = i; j < lru.end(); j++) // close all the fds from here on in
 | |
|               {
 | |
|                 if (verbose > 3)
 | |
|                   obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
 | |
|                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
 | |
|                 if (metrics_p)
 | |
|                   inc_metric("fdcache_op_count","op","evict");
 | |
|                 unlink (j->fd.c_str());
 | |
|               }
 | |
| 
 | |
|             lru.erase(i, lru.end()); // erase the nodes generally
 | |
|             break;
 | |
|           }
 | |
|       }
 | |
|     total_fd = 0;
 | |
|     total_mb = 0.0;
 | |
|     for(auto i = prefetch.begin(); i < prefetch.end(); i++){
 | |
|       // accumulate totals from most recently used one going backward
 | |
|         total_fd ++;
 | |
|         total_mb += i->fd_size_mb;
 | |
|         if (total_fd > this->max_prefetch_fds || total_mb > this->max_prefetch_mbs)
 | |
|           {
 | |
|             // found the cut here point!
 | |
|             for (auto j = i; j < prefetch.end(); j++) // close all the fds from here on in
 | |
|               {
 | |
|                 if (verbose > 3)
 | |
|                   obatched(clog) << "fdcache evicted from prefetch a=" << j->archive << " b=" << j->entry
 | |
|                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
 | |
|                 if (metrics_p)
 | |
|                   inc_metric("fdcache_op_count","op","prefetch_evict");
 | |
|                 unlink (j->fd.c_str());
 | |
|               }
 | |
| 
 | |
|             prefetch.erase(i, prefetch.end()); // erase the nodes generally
 | |
|             break;
 | |
|           }
 | |
|     }
 | |
|     if (metrics_p) set_metrics();
 | |
|   }
 | |
| 
 | |
| 
 | |
|   ~libarchive_fdcache()
 | |
|   {
 | |
|     // unlink any fdcache entries in $TMPDIR
 | |
|     // don't update metrics; those globals may be already destroyed
 | |
|     limit(0, 0, 0, 0, false);
 | |
|   }
 | |
| };
 | |
| static libarchive_fdcache fdcache;
 | |
| 
 | |
| 
 | |
| // For security/portability reasons, many distro-package archives have
 | |
| // a "./" in front of path names; others have nothing, others have
 | |
| // "/".  Canonicalize them all to a single leading "/", with the
 | |
| // assumption that this matches the dwarf-derived file names too.
 | |
| string canonicalized_archive_entry_pathname(struct archive_entry *e)
 | |
| {
 | |
|   string fn = archive_entry_pathname(e);
 | |
|   if (fn.size() == 0)
 | |
|     return fn;
 | |
|   if (fn[0] == '/')
 | |
|     return fn;
 | |
|   if (fn[0] == '.')
 | |
|     return fn.substr(1);
 | |
|   else
 | |
|     return string("/")+fn;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_buildid_r_match (bool internal_req_p,
 | |
|                         int64_t b_mtime,
 | |
|                         const string& b_source0,
 | |
|                         const string& b_source1,
 | |
|                         int *result_fd)
 | |
| {
 | |
|   struct stat fs;
 | |
|   int rc = stat (b_source0.c_str(), &fs);
 | |
|   if (rc != 0)
 | |
|     throw libc_exception (errno, string("stat ") + b_source0);
 | |
| 
 | |
|   if ((int64_t) fs.st_mtime != b_mtime)
 | |
|     {
 | |
|       if (verbose)
 | |
|         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
 | |
|       return 0;
 | |
|     }
 | |
| 
 | |
|   // check for a match in the fdcache first
 | |
|   int fd = fdcache.lookup(b_source0, b_source1);
 | |
|   while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
 | |
|     {
 | |
|       rc = fstat(fd, &fs);
 | |
|       if (rc < 0) // disappeared?
 | |
|         {
 | |
|           if (verbose)
 | |
|             obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
 | |
|           close(fd);
 | |
|           fdcache.clear(b_source0, b_source1);
 | |
|           break; // branch out of if "loop", to try new libarchive fetch attempt
 | |
|         }
 | |
| 
 | |
|       struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
 | |
|       if (r == 0)
 | |
|         {
 | |
|           if (verbose)
 | |
|             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
 | |
|           close(fd);
 | |
|           break; // branch out of if "loop", to try new libarchive fetch attempt
 | |
|         }
 | |
| 
 | |
|       inc_metric ("http_responses_total","result","archive fdcache");
 | |
| 
 | |
|       MHD_add_response_header (r, "Content-Type", "application/octet-stream");
 | |
|       MHD_add_response_header (r, "X-DEBUGINFOD-SIZE", to_string(fs.st_size).c_str());
 | |
|       MHD_add_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
 | |
|       MHD_add_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
 | |
|       add_mhd_last_modified (r, fs.st_mtime);
 | |
|       if (verbose > 1)
 | |
|         obatched(clog) << "serving fdcache archive " << b_source0 << " file " << b_source1 << endl;
 | |
|       /* libmicrohttpd will close it. */
 | |
|       if (result_fd)
 | |
|         *result_fd = fd;
 | |
|       return r;
 | |
|       // NB: see, we never go around the 'loop' more than once
 | |
|     }
 | |
| 
 | |
|   // no match ... grumble, must process the archive
 | |
|   string archive_decoder = "/dev/null";
 | |
|   string archive_extension = "";
 | |
|   for (auto&& arch : scan_archives)
 | |
|     if (string_endswith(b_source0, arch.first))
 | |
|       {
 | |
|         archive_extension = arch.first;
 | |
|         archive_decoder = arch.second;
 | |
|       }
 | |
|   FILE* fp;
 | |
|   defer_dtor<FILE*,int>::dtor_fn dfn;
 | |
|   if (archive_decoder != "cat")
 | |
|     {
 | |
|       string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
 | |
|       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
 | |
|       dfn = pclose;
 | |
|       if (fp == NULL)
 | |
|         throw libc_exception (errno, string("popen ") + popen_cmd);
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       fp = fopen (b_source0.c_str(), "r");
 | |
|       dfn = fclose;
 | |
|       if (fp == NULL)
 | |
|         throw libc_exception (errno, string("fopen ") + b_source0);
 | |
|     }
 | |
|   defer_dtor<FILE*,int> fp_closer (fp, dfn);
 | |
| 
 | |
|   struct archive *a;
 | |
|   a = archive_read_new();
 | |
|   if (a == NULL)
 | |
|     throw archive_exception("cannot create archive reader");
 | |
|   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
 | |
| 
 | |
|   rc = archive_read_support_format_all(a);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot select all format");
 | |
|   rc = archive_read_support_filter_all(a);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot select all filters");
 | |
| 
 | |
|   rc = archive_read_open_FILE (a, fp);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot open archive from pipe");
 | |
| 
 | |
|   // archive traversal is in three stages, no, four stages:
 | |
|   // 1) skip entries whose names do not match the requested one
 | |
|   // 2) extract the matching entry name (set r = result)
 | |
|   // 3) extract some number of prefetched entries (just into fdcache)
 | |
|   // 4) abort any further processing
 | |
|   struct MHD_Response* r = 0;                 // will set in stage 2
 | |
|   unsigned prefetch_count =
 | |
|     internal_req_p ? 0 : fdcache_prefetch;    // will decrement in stage 3
 | |
| 
 | |
|   while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
 | |
|     {
 | |
|       if (interrupted)
 | |
|         break;
 | |
| 
 | |
|       struct archive_entry *e;
 | |
|       rc = archive_read_next_header (a, &e);
 | |
|       if (rc != ARCHIVE_OK)
 | |
|         break;
 | |
| 
 | |
|       if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
 | |
|         continue;
 | |
| 
 | |
|       string fn = canonicalized_archive_entry_pathname (e);
 | |
|       if ((r == 0) && (fn != b_source1)) // stage 1
 | |
|         continue;
 | |
| 
 | |
|       if (fdcache.probe (b_source0, fn)) // skip if already interned
 | |
|         continue;
 | |
| 
 | |
|       // extract this file to a temporary file
 | |
|       char* tmppath = NULL;
 | |
|       rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
 | |
|       if (rc < 0)
 | |
|         throw libc_exception (ENOMEM, "cannot allocate tmppath");
 | |
|       defer_dtor<void*,void> tmmpath_freer (tmppath, free);
 | |
|       fd = mkstemp (tmppath);
 | |
|       if (fd < 0)
 | |
|         throw libc_exception (errno, "cannot create temporary file");
 | |
|       // NB: don't unlink (tmppath), as fdcache will take charge of it.
 | |
| 
 | |
|       // NB: this can take many uninterruptible seconds for a huge file
 | |
|       rc = archive_read_data_into_fd (a, fd);
 | |
|       if (rc != ARCHIVE_OK) // e.g. ENOSPC!
 | |
|         {
 | |
|           close (fd);
 | |
|           unlink (tmppath);
 | |
|           throw archive_exception(a, "cannot extract file");
 | |
|         }
 | |
| 
 | |
|       // Set the mtime so the fdcache file mtimes, even prefetched ones,
 | |
|       // propagate to future webapi clients.
 | |
|       struct timeval tvs[2];
 | |
|       tvs[0].tv_sec = tvs[1].tv_sec = archive_entry_mtime(e);
 | |
|       tvs[0].tv_usec = tvs[1].tv_usec = 0;
 | |
|       (void) futimes (fd, tvs);  /* best effort */
 | |
| 
 | |
|       if (r != 0) // stage 3
 | |
|         {
 | |
|           // NB: now we know we have a complete reusable file; make fdcache
 | |
|           // responsible for unlinking it later.
 | |
|           fdcache.intern(b_source0, fn,
 | |
|                          tmppath, archive_entry_size(e),
 | |
|                          false); // prefetched ones go to the prefetch cache
 | |
|           prefetch_count --;
 | |
|           close (fd); // we're not saving this fd to make a mhd-response from!
 | |
|           continue;
 | |
|         }
 | |
| 
 | |
|       // NB: now we know we have a complete reusable file; make fdcache
 | |
|       // responsible for unlinking it later.
 | |
|       fdcache.intern(b_source0, b_source1,
 | |
|                      tmppath, archive_entry_size(e),
 | |
|                      true); // requested ones go to the front of lru
 | |
| 
 | |
|       inc_metric ("http_responses_total","result",archive_extension + " archive");
 | |
|       r = MHD_create_response_from_fd (archive_entry_size(e), fd);
 | |
|       if (r == 0)
 | |
|         {
 | |
|           if (verbose)
 | |
|             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
 | |
|           close(fd);
 | |
|           break; // assume no chance of better luck around another iteration; no other copies of same file
 | |
|         }
 | |
|       else
 | |
|         {
 | |
|           MHD_add_response_header (r, "Content-Type", "application/octet-stream");
 | |
|           std::string file = b_source1.substr(b_source1.find_last_of("/")+1, b_source1.length());
 | |
|           MHD_add_response_header (r, "X-DEBUGINFOD-SIZE", to_string(fs.st_size).c_str());
 | |
|           MHD_add_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
 | |
|           MHD_add_response_header (r, "X-DEBUGINFOD-FILE", file.c_str());
 | |
| 
 | |
|           add_mhd_last_modified (r, archive_entry_mtime(e));
 | |
|           if (verbose > 1)
 | |
|             obatched(clog) << "serving archive " << b_source0 << " file " << b_source1 << endl;
 | |
|           /* libmicrohttpd will close it. */
 | |
|           if (result_fd)
 | |
|             *result_fd = fd;
 | |
|           continue;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   // XXX: rpm/file not found: delete this R entry?
 | |
|   return r;
 | |
| }
 | |
| 
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_buildid_match (bool internal_req_p,
 | |
|                       int64_t b_mtime,
 | |
|                       const string& b_stype,
 | |
|                       const string& b_source0,
 | |
|                       const string& b_source1,
 | |
|                       int *result_fd)
 | |
| {
 | |
|   try
 | |
|     {
 | |
|       if (b_stype == "F")
 | |
|         return handle_buildid_f_match(internal_req_p, b_mtime, b_source0, result_fd);
 | |
|       else if (b_stype == "R")
 | |
|         return handle_buildid_r_match(internal_req_p, b_mtime, b_source0, b_source1, result_fd);
 | |
|     }
 | |
|   catch (const reportable_exception &e)
 | |
|     {
 | |
|       e.report(clog);
 | |
|       // Report but swallow libc etc. errors here; let the caller
 | |
|       // iterate to other matches of the content.
 | |
|     }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| debuginfod_find_progress (debuginfod_client *, long a, long b)
 | |
| {
 | |
|   if (verbose > 4)
 | |
|     obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
 | |
| 
 | |
|   return interrupted;
 | |
| }
 | |
| 
 | |
| 
 | |
| // a little lru pool of debuginfod_client*s for reuse between query threads
 | |
| 
 | |
| mutex dc_pool_lock;
 | |
| deque<debuginfod_client*> dc_pool;
 | |
| 
 | |
| debuginfod_client* debuginfod_pool_begin()
 | |
| {
 | |
|   unique_lock<mutex> lock(dc_pool_lock);
 | |
|   if (dc_pool.size() > 0)
 | |
|     {
 | |
|       inc_metric("dc_pool_op_count","op","begin-reuse");
 | |
|       debuginfod_client *c = dc_pool.front();
 | |
|       dc_pool.pop_front();
 | |
|       return c;
 | |
|     }
 | |
|   inc_metric("dc_pool_op_count","op","begin-new");
 | |
|   return debuginfod_begin();
 | |
| }
 | |
| 
 | |
| 
 | |
| void debuginfod_pool_groom()
 | |
| {
 | |
|   unique_lock<mutex> lock(dc_pool_lock);
 | |
|   while (dc_pool.size() > 0)
 | |
|     {
 | |
|       inc_metric("dc_pool_op_count","op","end");
 | |
|       debuginfod_end(dc_pool.front());
 | |
|       dc_pool.pop_front();
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| void debuginfod_pool_end(debuginfod_client* c)
 | |
| {
 | |
|   unique_lock<mutex> lock(dc_pool_lock);
 | |
|   inc_metric("dc_pool_op_count","op","end-save");
 | |
|   dc_pool.push_front(c); // accelerate reuse, vs. push_back
 | |
| }
 | |
| 
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_buildid (MHD_Connection* conn,
 | |
|                 const string& buildid /* unsafe */,
 | |
|                 string& artifacttype /* unsafe, cleanse on exception/return */,
 | |
|                 const string& suffix /* unsafe */,
 | |
|                 int *result_fd)
 | |
| {
 | |
|   // validate artifacttype
 | |
|   string atype_code;
 | |
|   if (artifacttype == "debuginfo") atype_code = "D";
 | |
|   else if (artifacttype == "executable") atype_code = "E";
 | |
|   else if (artifacttype == "source") atype_code = "S";
 | |
|   else {
 | |
|     artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data 
 | |
|     throw reportable_exception("invalid artifacttype");
 | |
|   }
 | |
| 
 | |
|   inc_metric("http_requests_total", "type", artifacttype);
 | |
|   
 | |
|   if (atype_code == "S" && suffix == "")
 | |
|      throw reportable_exception("invalid source suffix");
 | |
| 
 | |
|   // validate buildid
 | |
|   if ((buildid.size() < 2) || // not empty
 | |
|       (buildid.size() % 2) || // even number
 | |
|       (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
 | |
|     throw reportable_exception("invalid buildid");
 | |
| 
 | |
|   if (verbose > 1)
 | |
|     obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
 | |
|          << " suffix=" << suffix << endl;
 | |
| 
 | |
|   // If invoked from the scanner threads, use the scanners' read-write
 | |
|   // connection.  Otherwise use the web query threads' read-only connection.
 | |
|   sqlite3 *thisdb = (conn == 0) ? db : dbq;
 | |
| 
 | |
|   sqlite_ps *pp = 0;
 | |
| 
 | |
|   if (atype_code == "D")
 | |
|     {
 | |
|       pp = new sqlite_ps (thisdb, "mhd-query-d",
 | |
|                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
 | |
|                           "order by mtime desc");
 | |
|       pp->reset();
 | |
|       pp->bind(1, buildid);
 | |
|     }
 | |
|   else if (atype_code == "E")
 | |
|     {
 | |
|       pp = new sqlite_ps (thisdb, "mhd-query-e",
 | |
|                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
 | |
|                           "order by mtime desc");
 | |
|       pp->reset();
 | |
|       pp->bind(1, buildid);
 | |
|     }
 | |
|   else if (atype_code == "S")
 | |
|     {
 | |
|       // PR25548
 | |
|       // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
 | |
|       // We let the query pass with either one.
 | |
| 
 | |
|       pp = new sqlite_ps (thisdb, "mhd-query-s",
 | |
|                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
 | |
|                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
 | |
|       pp->reset();
 | |
|       pp->bind(1, buildid);
 | |
|       // NB: we don't store the non-canonicalized path names any more, but old databases
 | |
|       // might have them (and no canon ones), so we keep searching for both.
 | |
|       pp->bind(2, suffix);
 | |
|       pp->bind(3, canon_pathname(suffix));
 | |
|     }
 | |
|   unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
 | |
| 
 | |
|   // consume all the rows
 | |
|   while (1)
 | |
|     {
 | |
|       int rc = pp->step();
 | |
|       if (rc == SQLITE_DONE) break;
 | |
|       if (rc != SQLITE_ROW)
 | |
|         throw sqlite_exception(rc, "step");
 | |
| 
 | |
|       int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
 | |
|       string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
 | |
|       string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
 | |
|       string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
 | |
| 
 | |
|       if (verbose > 1)
 | |
|         obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
 | |
|              << " source0=" << b_source0 << " source1=" << b_source1 << endl;
 | |
| 
 | |
|       // Try accessing the located match.
 | |
|       // XXX: in case of multiple matches, attempt them in parallel?
 | |
|       auto r = handle_buildid_match (conn ? false : true,
 | |
|                                      b_mtime, b_stype, b_source0, b_source1, result_fd);
 | |
|       if (r)
 | |
|         return r;
 | |
|     }
 | |
|   pp->reset();
 | |
| 
 | |
|   // We couldn't find it in the database.  Last ditch effort
 | |
|   // is to defer to other debuginfo servers.
 | |
| 
 | |
|   int fd = -1;
 | |
|   debuginfod_client *client = debuginfod_pool_begin ();
 | |
|   if (client != NULL)
 | |
|     {
 | |
|       debuginfod_set_progressfn (client, & debuginfod_find_progress);
 | |
| 
 | |
|       if (conn)
 | |
|         {
 | |
|           // Transcribe incoming User-Agent:
 | |
|           string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
 | |
|           string ua_complete = string("User-Agent: ") + ua;
 | |
|           debuginfod_add_http_header (client, ua_complete.c_str());
 | |
| 
 | |
|           // Compute larger XFF:, for avoiding info loss during
 | |
|           // federation, and for future cyclicity detection.
 | |
|           string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
 | |
|           if (xff != "")
 | |
|             xff += string(", "); // comma separated list
 | |
| 
 | |
|           unsigned int xff_count = 0;
 | |
|           for (auto&& i : xff){
 | |
|             if (i == ',') xff_count++;
 | |
|           }
 | |
| 
 | |
|           // if X-Forwarded-For: exceeds N hops,
 | |
|           // do not delegate a local lookup miss to upstream debuginfods.
 | |
|           if (xff_count >= forwarded_ttl_limit)
 | |
|             throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
 | |
| and will not query the upstream servers");
 | |
| 
 | |
|           // Compute the client's numeric IP address only - so can't merge with conninfo()
 | |
|           const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
 | |
|                                                                        MHD_CONNECTION_INFO_CLIENT_ADDRESS);
 | |
|           struct sockaddr *so = u ? u->client_addr : 0;
 | |
|           char hostname[256] = ""; // RFC1035
 | |
|           if (so && so->sa_family == AF_INET)
 | |
|             (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
 | |
|                                 NI_NUMERICHOST);
 | |
|           else if (so && so->sa_family == AF_INET6)
 | |
|             (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
 | |
|                                 NI_NUMERICHOST);
 | |
| 
 | |
|           string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
 | |
|           debuginfod_add_http_header (client, xff_complete.c_str());
 | |
|         }
 | |
| 
 | |
|       if (artifacttype == "debuginfo")
 | |
| 	fd = debuginfod_find_debuginfo (client,
 | |
| 					(const unsigned char*) buildid.c_str(),
 | |
| 					0, NULL);
 | |
|       else if (artifacttype == "executable")
 | |
| 	fd = debuginfod_find_executable (client,
 | |
| 					 (const unsigned char*) buildid.c_str(),
 | |
| 					 0, NULL);
 | |
|       else if (artifacttype == "source")
 | |
| 	fd = debuginfod_find_source (client,
 | |
| 				     (const unsigned char*) buildid.c_str(),
 | |
| 				     0, suffix.c_str(), NULL);
 | |
|     }
 | |
|   else
 | |
|     fd = -errno; /* Set by debuginfod_begin.  */
 | |
|   debuginfod_pool_end (client);
 | |
| 
 | |
|   if (fd >= 0)
 | |
|     {
 | |
|       inc_metric ("http_responses_total","result","upstream");
 | |
|       struct stat s;
 | |
|       int rc = fstat (fd, &s);
 | |
|       if (rc == 0)
 | |
|         {
 | |
|           auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
 | |
|           if (r)
 | |
|             {
 | |
|               MHD_add_response_header (r, "Content-Type", "application/octet-stream");
 | |
|               add_mhd_last_modified (r, s.st_mtime);
 | |
|               if (verbose > 1)
 | |
|                 obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
 | |
|               if (result_fd)
 | |
|                 *result_fd = fd;
 | |
|               return r; // NB: don't close fd; libmicrohttpd will
 | |
|             }
 | |
|         }
 | |
|       close (fd);
 | |
|     }
 | |
|   else
 | |
|     switch(fd)
 | |
|       {
 | |
|       case -ENOSYS:
 | |
|         break;
 | |
|       case -ENOENT:
 | |
|         break;
 | |
|       default: // some more tricky error
 | |
|         throw libc_exception(-fd, "upstream debuginfod query failed");
 | |
|       }
 | |
| 
 | |
|   throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| static map<string,double> metrics; // arbitrary data for /metrics query
 | |
| // NB: store int64_t since all our metrics are integers; prometheus accepts double
 | |
| static mutex metrics_lock;
 | |
| // NB: these objects get released during the process exit via global dtors
 | |
| // do not call them from within other global dtors
 | |
| 
 | |
| // utility function for assembling prometheus-compatible
 | |
| // name="escaped-value" strings
 | |
| // https://prometheus.io/docs/instrumenting/exposition_formats/
 | |
| static string
 | |
| metric_label(const string& name, const string& value)
 | |
| {
 | |
|   string x = name + "=\"";
 | |
|   for (auto&& c : value)
 | |
|     switch(c)
 | |
|       {
 | |
|       case '\\': x += "\\\\"; break;
 | |
|       case '\"': x += "\\\""; break;
 | |
|       case '\n': x += "\\n"; break;
 | |
|       default: x += c; break;
 | |
|       }
 | |
|   x += "\"";
 | |
|   return x;
 | |
| }
 | |
| 
 | |
| 
 | |
| // add prometheus-format metric name + label tuple (if any) + value
 | |
| 
 | |
| static void
 | |
| set_metric(const string& metric, double value)
 | |
| {
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[metric] = value;
 | |
| }
 | |
| #if 0 /* unused */
 | |
| static void
 | |
| inc_metric(const string& metric)
 | |
| {
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[metric] ++;
 | |
| }
 | |
| #endif
 | |
| static void
 | |
| set_metric(const string& metric,
 | |
|            const string& lname, const string& lvalue,
 | |
|            double value)
 | |
| {
 | |
|   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[key] = value;
 | |
| }
 | |
| 
 | |
| static void
 | |
| inc_metric(const string& metric,
 | |
|            const string& lname, const string& lvalue)
 | |
| {
 | |
|   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[key] ++;
 | |
| }
 | |
| static void
 | |
| add_metric(const string& metric,
 | |
|            const string& lname, const string& lvalue,
 | |
|            double value)
 | |
| {
 | |
|   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[key] += value;
 | |
| }
 | |
| #if 0
 | |
| static void
 | |
| add_metric(const string& metric,
 | |
|            double value)
 | |
| {
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[metric] += value;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| 
 | |
| // and more for higher arity labels if needed
 | |
| 
 | |
| static void
 | |
| inc_metric(const string& metric,
 | |
|            const string& lname, const string& lvalue,
 | |
|            const string& rname, const string& rvalue)
 | |
| {
 | |
|   string key = (metric + "{"
 | |
|                 + metric_label(lname, lvalue) + ","
 | |
|                 + metric_label(rname, rvalue) + "}");
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[key] ++;
 | |
| }
 | |
| static void
 | |
| add_metric(const string& metric,
 | |
|            const string& lname, const string& lvalue,
 | |
|            const string& rname, const string& rvalue,
 | |
|            double value)
 | |
| {
 | |
|   string key = (metric + "{"
 | |
|                 + metric_label(lname, lvalue) + ","
 | |
|                 + metric_label(rname, rvalue) + "}");
 | |
|   unique_lock<mutex> lock(metrics_lock);
 | |
|   metrics[key] += value;
 | |
| }
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_metrics (off_t* size)
 | |
| {
 | |
|   stringstream o;
 | |
|   {
 | |
|     unique_lock<mutex> lock(metrics_lock);
 | |
|     for (auto&& i : metrics)
 | |
|       o << i.first
 | |
|         << " "
 | |
|         << std::setprecision(std::numeric_limits<double>::digits10 + 1)
 | |
|         << i.second
 | |
|         << endl;
 | |
|   }
 | |
|   const string& os = o.str();
 | |
|   MHD_Response* r = MHD_create_response_from_buffer (os.size(),
 | |
|                                                      (void*) os.c_str(),
 | |
|                                                      MHD_RESPMEM_MUST_COPY);
 | |
|   *size = os.size();
 | |
|   MHD_add_response_header (r, "Content-Type", "text/plain");
 | |
|   return r;
 | |
| }
 | |
| 
 | |
| static struct MHD_Response*
 | |
| handle_root (off_t* size)
 | |
| {
 | |
|   static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
 | |
| 			  + string (PACKAGE_VERSION);
 | |
|   MHD_Response* r = MHD_create_response_from_buffer (version.size (),
 | |
| 						     (void *) version.c_str (),
 | |
| 						     MHD_RESPMEM_PERSISTENT);
 | |
|   *size = version.size ();
 | |
|   MHD_add_response_header (r, "Content-Type", "text/plain");
 | |
|   return r;
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| /* libmicrohttpd callback */
 | |
| static MHD_RESULT
 | |
| handler_cb (void * /*cls*/,
 | |
|             struct MHD_Connection *connection,
 | |
|             const char *url,
 | |
|             const char *method,
 | |
|             const char * /*version*/,
 | |
|             const char * /*upload_data*/,
 | |
|             size_t * /*upload_data_size*/,
 | |
|             void ** ptr)
 | |
| {
 | |
|   struct MHD_Response *r = NULL;
 | |
|   string url_copy = url;
 | |
| 
 | |
|   /* libmicrohttpd always makes (at least) two callbacks: once just
 | |
|      past the headers, and one after the request body is finished
 | |
|      being received.  If we process things early (first callback) and
 | |
|      queue a response, libmicrohttpd would suppress http keep-alive
 | |
|      (via connection->read_closed = true). */
 | |
|   static int aptr; /* just some random object to use as a flag */
 | |
|   if (&aptr != *ptr)
 | |
|     {
 | |
|       /* do never respond on first call */
 | |
|       *ptr = &aptr;
 | |
|       return MHD_YES;
 | |
|     }
 | |
|   *ptr = NULL;                     /* reset when done */
 | |
|   
 | |
|   const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
 | |
|   long maxsize = 0;
 | |
|   if (maxsize_string != NULL && maxsize_string[0] != '\0')
 | |
|     maxsize = atol(maxsize_string);
 | |
|   else
 | |
|     maxsize = 0;
 | |
| 
 | |
| #if MHD_VERSION >= 0x00097002
 | |
|   enum MHD_Result rc;
 | |
| #else
 | |
|   int rc = MHD_NO; // mhd
 | |
| #endif
 | |
|   int http_code = 500;
 | |
|   off_t http_size = -1;
 | |
|   struct timespec ts_start, ts_end;
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_start);
 | |
|   double afteryou = 0.0;
 | |
|   string artifacttype, suffix;
 | |
| 
 | |
|   try
 | |
|     {
 | |
|       if (string(method) != "GET")
 | |
|         throw reportable_exception(400, "we support GET only");
 | |
| 
 | |
|       /* Start decoding the URL. */
 | |
|       size_t slash1 = url_copy.find('/', 1);
 | |
|       string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
 | |
| 
 | |
|       if (slash1 != string::npos && url1 == "/buildid")
 | |
|         {
 | |
|           // PR27863: block this thread awhile if another thread is already busy
 | |
|           // fetching the exact same thing.  This is better for Everyone.
 | |
|           // The latecomer says "... after you!" and waits.
 | |
|           add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
 | |
| #ifdef HAVE_PTHREAD_SETNAME_NP
 | |
|           (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
 | |
| #endif
 | |
|           struct timespec tsay_start, tsay_end;
 | |
|           clock_gettime (CLOCK_MONOTONIC, &tsay_start);
 | |
|           static unique_set<string> busy_urls;
 | |
|           unique_set_reserver<string> after_you(busy_urls, url_copy);
 | |
|           clock_gettime (CLOCK_MONOTONIC, &tsay_end);
 | |
|           afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
 | |
|           add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
 | |
|           
 | |
|           tmp_inc_metric m ("thread_busy", "role", "http-buildid");
 | |
| #ifdef HAVE_PTHREAD_SETNAME_NP
 | |
|           (void) pthread_setname_np (pthread_self(), "mhd-buildid");
 | |
| #endif
 | |
|           size_t slash2 = url_copy.find('/', slash1+1);
 | |
|           if (slash2 == string::npos)
 | |
|             throw reportable_exception("/buildid/ webapi error, need buildid");
 | |
| 
 | |
|           string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
 | |
| 
 | |
|           size_t slash3 = url_copy.find('/', slash2+1);
 | |
| 
 | |
|           if (slash3 == string::npos)
 | |
|             {
 | |
|               artifacttype = url_copy.substr(slash2+1);
 | |
|               suffix = "";
 | |
|             }
 | |
|           else
 | |
|             {
 | |
|               artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
 | |
|               suffix = url_copy.substr(slash3); // include the slash in the suffix
 | |
|             }
 | |
| 
 | |
|           // get the resulting fd so we can report its size
 | |
|           int fd;
 | |
|           r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
 | |
|           if (r)
 | |
|             {
 | |
|               struct stat fs;
 | |
|               if (fstat(fd, &fs) == 0)
 | |
|                 http_size = fs.st_size;
 | |
|               // libmicrohttpd will close (fd);
 | |
|             }
 | |
|         }
 | |
|       else if (url1 == "/metrics")
 | |
|         {
 | |
|           tmp_inc_metric m ("thread_busy", "role", "http-metrics");
 | |
|           artifacttype = "metrics";
 | |
|           inc_metric("http_requests_total", "type", artifacttype);
 | |
|           r = handle_metrics(& http_size);
 | |
|         }
 | |
|       else if (url1 == "/")
 | |
|         {
 | |
|           artifacttype = "/";
 | |
|           inc_metric("http_requests_total", "type", artifacttype);
 | |
|           r = handle_root(& http_size);
 | |
|         }
 | |
|       else
 | |
|         throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
 | |
| 
 | |
|       if (r == 0)
 | |
|         throw reportable_exception("internal error, missing response");
 | |
| 
 | |
|       if (maxsize > 0 && http_size > maxsize)
 | |
|         {
 | |
|           MHD_destroy_response(r);
 | |
|           throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
 | |
|         }
 | |
| 
 | |
|       rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
 | |
|       http_code = MHD_HTTP_OK;
 | |
|       MHD_destroy_response (r);
 | |
|     }
 | |
|   catch (const reportable_exception& e)
 | |
|     {
 | |
|       inc_metric("http_responses_total","result","error");
 | |
|       e.report(clog);
 | |
|       http_code = e.code;
 | |
|       http_size = e.message.size();
 | |
|       rc = e.mhd_send_response (connection);
 | |
|     }
 | |
| 
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_end);
 | |
|   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
 | |
|   // afteryou: delay waiting for other client's identical query to complete
 | |
|   // deltas: total latency, including afteryou waiting
 | |
|   obatched(clog) << conninfo(connection)
 | |
|                  << ' ' << method << ' ' << url
 | |
|                  << ' ' << http_code << ' ' << http_size
 | |
|                  << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
 | |
|                  << endl;
 | |
| 
 | |
|   // related prometheus metrics
 | |
|   string http_code_str = to_string(http_code);
 | |
|   add_metric("http_responses_transfer_bytes_sum",
 | |
|              "code", http_code_str, "type", artifacttype, http_size);
 | |
|   inc_metric("http_responses_transfer_bytes_count",
 | |
|              "code", http_code_str, "type", artifacttype);
 | |
| 
 | |
|   add_metric("http_responses_duration_milliseconds_sum",
 | |
|              "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
 | |
|   inc_metric("http_responses_duration_milliseconds_count",
 | |
|              "code", http_code_str, "type", artifacttype);
 | |
| 
 | |
|   add_metric("http_responses_after_you_milliseconds_sum",
 | |
|              "code", http_code_str, "type", artifacttype, afteryou*1000);
 | |
|   inc_metric("http_responses_after_you_milliseconds_count",
 | |
|              "code", http_code_str, "type", artifacttype);
 | |
| 
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| // borrowed originally from src/nm.c get_local_names()
 | |
| 
 | |
| static void
 | |
| dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
 | |
|   noexcept // no exceptions - so we can simplify the altdbg resource release at end
 | |
| {
 | |
|   Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
 | |
|   if (dbg == NULL)
 | |
|     return;
 | |
| 
 | |
|   Dwarf* altdbg = NULL;
 | |
|   int    altdbg_fd = -1;
 | |
| 
 | |
|   // DWZ handling: if we have an unsatisfied debug-alt-link, add an
 | |
|   // empty string into the outgoing sourcefiles set, so the caller
 | |
|   // should know that our data is incomplete.
 | |
|   const char *alt_name_p;
 | |
|   const void *alt_build_id; // elfutils-owned memory
 | |
|   ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
 | |
|   if (sz > 0) // got one!
 | |
|     {
 | |
|       string buildid;
 | |
|       unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
 | |
|       for (ssize_t idx=0; idx<sz; idx++)
 | |
|         {
 | |
|           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
 | |
|           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
 | |
|         }
 | |
| 
 | |
|       if (verbose > 3)
 | |
|         obatched(clog) << "Need altdebug buildid=" << buildid << endl;
 | |
| 
 | |
|       // but is it unsatisfied the normal elfutils ways?
 | |
|       Dwarf* alt = dwarf_getalt (dbg);
 | |
|       if (alt == NULL)
 | |
|         {
 | |
|           // Yup, unsatisfied the normal way.  Maybe we can satisfy it
 | |
|           // from our own debuginfod database.
 | |
|           int alt_fd;
 | |
|           struct MHD_Response *r = 0;
 | |
|           try
 | |
|             {
 | |
|               string artifacttype = "debuginfo";
 | |
|               r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
 | |
|             }
 | |
|           catch (const reportable_exception& e)
 | |
|             {
 | |
|               // swallow exceptions
 | |
|             }
 | |
| 
 | |
|           // NB: this is not actually recursive!  This invokes the web-query
 | |
|           // path, which cannot get back into the scan code paths.
 | |
|           if (r)
 | |
|             {
 | |
|               // Found it!
 | |
|               altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
 | |
|               alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
 | |
|               // NB: must close this dwarf and this fd at the bottom of the function!
 | |
|               MHD_destroy_response (r); // will close alt_fd
 | |
|               if (alt)
 | |
|                 dwarf_setalt (dbg, alt);
 | |
|             }
 | |
|         }
 | |
|       else
 | |
|         {
 | |
|           // NB: dwarf_setalt(alt) inappropriate - already done!
 | |
|           // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
 | |
|         }
 | |
| 
 | |
|       if (alt)
 | |
|         {
 | |
|           if (verbose > 3)
 | |
|             obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
 | |
|         }
 | |
|       else // (alt == NULL) - signal possible presence of poor debuginfo
 | |
|         {
 | |
|           debug_sourcefiles.insert("");
 | |
|           if (verbose > 3)
 | |
|             obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   Dwarf_Off offset = 0;
 | |
|   Dwarf_Off old_offset;
 | |
|   size_t hsize;
 | |
| 
 | |
|   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
 | |
|     {
 | |
|       Dwarf_Die cudie_mem;
 | |
|       Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
 | |
| 
 | |
|       if (cudie == NULL)
 | |
|         continue;
 | |
|       if (dwarf_tag (cudie) != DW_TAG_compile_unit)
 | |
|         continue;
 | |
| 
 | |
|       const char *cuname = dwarf_diename(cudie) ?: "unknown";
 | |
| 
 | |
|       Dwarf_Files *files;
 | |
|       size_t nfiles;
 | |
|       if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
 | |
|         continue;
 | |
| 
 | |
|       // extract DW_AT_comp_dir to resolve relative file names
 | |
|       const char *comp_dir = "";
 | |
|       const char *const *dirs;
 | |
|       size_t ndirs;
 | |
|       if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
 | |
|           dirs[0] != NULL)
 | |
|         comp_dir = dirs[0];
 | |
|       if (comp_dir == NULL)
 | |
|         comp_dir = "";
 | |
| 
 | |
|       if (verbose > 3)
 | |
|         obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
 | |
|                        << " #files=" << nfiles << " #dirs=" << ndirs << endl;
 | |
| 
 | |
|       if (comp_dir[0] == '\0' && cuname[0] != '/')
 | |
|         {
 | |
|           // This is a common symptom for dwz-compressed debug files,
 | |
|           // where the altdebug file cannot be resolved.
 | |
|           if (verbose > 3)
 | |
|             obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
 | |
|           continue;
 | |
|         }
 | |
| 
 | |
|       for (size_t f = 1; f < nfiles; f++)
 | |
|         {
 | |
|           const char *hat = dwarf_filesrc (files, f, NULL, NULL);
 | |
|           if (hat == NULL)
 | |
|             continue;
 | |
| 
 | |
|           if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
 | |
|             continue;
 | |
| 
 | |
|           string waldo;
 | |
|           if (hat[0] == '/') // absolute
 | |
|             waldo = (string (hat));
 | |
|           else if (comp_dir[0] != '\0') // comp_dir relative
 | |
|             waldo = (string (comp_dir) + string("/") + string (hat));
 | |
|           else
 | |
|            {
 | |
|              if (verbose > 3)
 | |
|                obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
 | |
|              continue;
 | |
|            }
 | |
| 
 | |
|           // NB: this is the 'waldo' that a dbginfo client will have
 | |
|           // to supply for us to give them the file The comp_dir
 | |
|           // prefixing is a definite complication.  Otherwise we'd
 | |
|           // have to return a setof comp_dirs (one per CU!) with
 | |
|           // corresponding filesrc[] names, instead of one absolute
 | |
|           // resoved set.  Maybe we'll have to do that anyway.  XXX
 | |
| 
 | |
|           if (verbose > 4)
 | |
|             obatched(clog) << waldo
 | |
|                            << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") <<  endl;
 | |
| 
 | |
|           debug_sourcefiles.insert (waldo);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   dwarf_end(dbg);
 | |
|   if (altdbg)
 | |
|     dwarf_end(altdbg);
 | |
|   if (altdbg_fd >= 0)
 | |
|     close(altdbg_fd);
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static void
 | |
| elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
 | |
| {
 | |
|   Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
 | |
|   if (elf == NULL)
 | |
|     return;
 | |
| 
 | |
|   try // catch our types of errors and clean up the Elf* object
 | |
|     {
 | |
|       if (elf_kind (elf) != ELF_K_ELF)
 | |
|         {
 | |
|           elf_end (elf);
 | |
|           return;
 | |
|         }
 | |
| 
 | |
|       GElf_Ehdr ehdr_storage;
 | |
|       GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
 | |
|       if (ehdr == NULL)
 | |
|         {
 | |
|           elf_end (elf);
 | |
|           return;
 | |
|         }
 | |
|       auto elf_type = ehdr->e_type;
 | |
| 
 | |
|       const void *build_id; // elfutils-owned memory
 | |
|       ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
 | |
|       if (sz <= 0)
 | |
|         {
 | |
|           // It's not a diagnostic-worthy error for an elf file to lack build-id.
 | |
|           // It might just be very old.
 | |
|           elf_end (elf);
 | |
|           return;
 | |
|         }
 | |
| 
 | |
|       // build_id is a raw byte array; convert to hexadecimal *lowercase*
 | |
|       unsigned char* build_id_bytes = (unsigned char*) build_id;
 | |
|       for (ssize_t idx=0; idx<sz; idx++)
 | |
|         {
 | |
|           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
 | |
|           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
 | |
|         }
 | |
| 
 | |
|       // now decide whether it's an executable - namely, any allocatable section has
 | |
|       // PROGBITS;
 | |
|       if (elf_type == ET_EXEC || elf_type == ET_DYN)
 | |
|         {
 | |
|           size_t shnum;
 | |
|           int rc = elf_getshdrnum (elf, &shnum);
 | |
|           if (rc < 0)
 | |
|             throw elfutils_exception(rc, "getshdrnum");
 | |
| 
 | |
|           executable_p = false;
 | |
|           for (size_t sc = 0; sc < shnum; sc++)
 | |
|             {
 | |
|               Elf_Scn *scn = elf_getscn (elf, sc);
 | |
|               if (scn == NULL)
 | |
|                 continue;
 | |
| 
 | |
|               GElf_Shdr shdr_mem;
 | |
|               GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
 | |
|               if (shdr == NULL)
 | |
|                 continue;
 | |
| 
 | |
|               // allocated (loadable / vm-addr-assigned) section with available content?
 | |
|               if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
 | |
|                 {
 | |
|                   if (verbose > 4)
 | |
|                     obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
 | |
|                   executable_p = true;
 | |
|                   break; // no need to keep looking for others
 | |
|                 }
 | |
|             } // iterate over sections
 | |
|         } // executable_p classification
 | |
| 
 | |
|       // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
 | |
|       // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
 | |
|       size_t shstrndx;
 | |
|       int rc = elf_getshdrstrndx (elf, &shstrndx);
 | |
|       if (rc < 0)
 | |
|         throw elfutils_exception(rc, "getshdrstrndx");
 | |
| 
 | |
|       Elf_Scn *scn = NULL;
 | |
|       bool symtab_p = false;
 | |
|       bool bits_alloc_p = false;
 | |
|       while (true)
 | |
|         {
 | |
|           scn = elf_nextscn (elf, scn);
 | |
|           if (scn == NULL)
 | |
|             break;
 | |
|           GElf_Shdr shdr_storage;
 | |
|           GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
 | |
|           if (shdr == NULL)
 | |
|             break;
 | |
|           const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
 | |
|           if (section_name == NULL)
 | |
|             break;
 | |
|           if (startswith (section_name, ".debug_line") ||
 | |
|               startswith (section_name, ".zdebug_line"))
 | |
|             {
 | |
|               debuginfo_p = true;
 | |
|               dwarf_extract_source_paths (elf, debug_sourcefiles);
 | |
|               break; // expecting only one .*debug_line, so no need to look for others
 | |
|             }
 | |
|           else if (startswith (section_name, ".debug_") ||
 | |
|                    startswith (section_name, ".zdebug_"))
 | |
|             {
 | |
|               debuginfo_p = true;
 | |
|               // NB: don't break; need to parse .debug_line for sources
 | |
|             }
 | |
|           else if (shdr->sh_type == SHT_SYMTAB)
 | |
|             {
 | |
|               symtab_p = true;
 | |
|             }
 | |
|           else if (shdr->sh_type != SHT_NOBITS
 | |
|                    && shdr->sh_type != SHT_NOTE
 | |
|                    && (shdr->sh_flags & SHF_ALLOC) != 0)
 | |
|             {
 | |
|               bits_alloc_p = true;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|       // For more expansive elf/split-debuginfo classification, we
 | |
|       // want to identify as debuginfo "strip -s"-produced files
 | |
|       // without .debug_info* (like libicudata), but we don't want to
 | |
|       // identify "strip -g" executables (with .symtab left there).
 | |
|       if (symtab_p && !bits_alloc_p)
 | |
|         debuginfo_p = true;
 | |
|     }
 | |
|   catch (const reportable_exception& e)
 | |
|     {
 | |
|       e.report(clog);
 | |
|     }
 | |
|   elf_end (elf);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| scan_source_file (const string& rps, const stat_t& st,
 | |
|                   sqlite_ps& ps_upsert_buildids,
 | |
|                   sqlite_ps& ps_upsert_files,
 | |
|                   sqlite_ps& ps_upsert_de,
 | |
|                   sqlite_ps& ps_upsert_s,
 | |
|                   sqlite_ps& ps_query,
 | |
|                   sqlite_ps& ps_scan_done,
 | |
|                   unsigned& fts_cached,
 | |
|                   unsigned& fts_executable,
 | |
|                   unsigned& fts_debuginfo,
 | |
|                   unsigned& fts_sourcefiles)
 | |
| {
 | |
|   /* See if we know of it already. */
 | |
|   int rc = ps_query
 | |
|     .reset()
 | |
|     .bind(1, rps)
 | |
|     .bind(2, st.st_mtime)
 | |
|     .step();
 | |
|   ps_query.reset();
 | |
|   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
 | |
|     // no need to recheck a file/version we already know
 | |
|     // specifically, no need to elf-begin a file we already determined is non-elf
 | |
|     // (so is stored with buildid=NULL)
 | |
|     {
 | |
|       fts_cached++;
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|   bool executable_p = false, debuginfo_p = false; // E and/or D
 | |
|   string buildid;
 | |
|   set<string> sourcefiles;
 | |
| 
 | |
|   int fd = open (rps.c_str(), O_RDONLY);
 | |
|   try
 | |
|     {
 | |
|       if (fd >= 0)
 | |
|         elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
 | |
|       else
 | |
|         throw libc_exception(errno, string("open ") + rps);
 | |
|       add_metric ("scanned_bytes_total","source","file",
 | |
|                   st.st_size);
 | |
|       inc_metric ("scanned_files_total","source","file");
 | |
|     }
 | |
|   // NB: we catch exceptions here too, so that we can
 | |
|   // cache the corrupt-elf case (!executable_p &&
 | |
|   // !debuginfo_p) just below, just as if we had an
 | |
|   // EPERM error from open(2).
 | |
|   catch (const reportable_exception& e)
 | |
|     {
 | |
|       e.report(clog);
 | |
|     }
 | |
| 
 | |
|   if (fd >= 0)
 | |
|     close (fd);
 | |
| 
 | |
|   // register this file name in the interning table
 | |
|   ps_upsert_files
 | |
|     .reset()
 | |
|     .bind(1, rps)
 | |
|     .step_ok_done();
 | |
| 
 | |
|   if (buildid == "")
 | |
|     {
 | |
|       // no point storing an elf file without buildid
 | |
|       executable_p = false;
 | |
|       debuginfo_p = false;
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       // register this build-id in the interning table
 | |
|       ps_upsert_buildids
 | |
|         .reset()
 | |
|         .bind(1, buildid)
 | |
|         .step_ok_done();
 | |
|     }
 | |
| 
 | |
|   if (executable_p)
 | |
|     fts_executable ++;
 | |
|   if (debuginfo_p)
 | |
|     fts_debuginfo ++;
 | |
|   if (executable_p || debuginfo_p)
 | |
|     {
 | |
|       ps_upsert_de
 | |
|         .reset()
 | |
|         .bind(1, buildid)
 | |
|         .bind(2, debuginfo_p ? 1 : 0)
 | |
|         .bind(3, executable_p ? 1 : 0)
 | |
|         .bind(4, rps)
 | |
|         .bind(5, st.st_mtime)
 | |
|         .step_ok_done();
 | |
|     }
 | |
|   if (executable_p)
 | |
|     inc_metric("found_executable_total","source","files");
 | |
|   if (debuginfo_p)
 | |
|     inc_metric("found_debuginfo_total","source","files");
 | |
| 
 | |
|   if (sourcefiles.size() && buildid != "")
 | |
|     {
 | |
|       fts_sourcefiles += sourcefiles.size();
 | |
| 
 | |
|       for (auto&& dwarfsrc : sourcefiles)
 | |
|         {
 | |
|           char *srp = realpath(dwarfsrc.c_str(), NULL);
 | |
|           if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
 | |
|             continue; // unresolvable files are not a serious problem
 | |
|           // throw libc_exception(errno, "fts/file realpath " + srcpath);
 | |
|           string srps = string(srp);
 | |
|           free (srp);
 | |
| 
 | |
|           struct stat sfs;
 | |
|           rc = stat(srps.c_str(), &sfs);
 | |
|           if (rc != 0)
 | |
|             continue;
 | |
| 
 | |
|           if (verbose > 2)
 | |
|             obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
 | |
|                            << " mtime=" << sfs.st_mtime
 | |
|                            << " as source " << dwarfsrc << endl;
 | |
| 
 | |
|           ps_upsert_files
 | |
|             .reset()
 | |
|             .bind(1, srps)
 | |
|             .step_ok_done();
 | |
| 
 | |
|           // PR25548: store canonicalized dwarfsrc path
 | |
|           string dwarfsrc_canon = canon_pathname (dwarfsrc);
 | |
|           if (dwarfsrc_canon != dwarfsrc)
 | |
|             {
 | |
|               if (verbose > 3)
 | |
|                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
 | |
|             }
 | |
| 
 | |
|           ps_upsert_files
 | |
|             .reset()
 | |
|             .bind(1, dwarfsrc_canon)
 | |
|             .step_ok_done();
 | |
| 
 | |
|           ps_upsert_s
 | |
|             .reset()
 | |
|             .bind(1, buildid)
 | |
|             .bind(2, dwarfsrc_canon)
 | |
|             .bind(3, srps)
 | |
|             .bind(4, sfs.st_mtime)
 | |
|             .step_ok_done();
 | |
| 
 | |
|           inc_metric("found_sourcerefs_total","source","files");
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   ps_scan_done
 | |
|     .reset()
 | |
|     .bind(1, rps)
 | |
|     .bind(2, st.st_mtime)
 | |
|     .bind(3, st.st_size)
 | |
|     .step_ok_done();
 | |
| 
 | |
|   if (verbose > 2)
 | |
|     obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
 | |
|                    << " mtime=" << st.st_mtime << " atype="
 | |
|                    << (executable_p ? "E" : "")
 | |
|                    << (debuginfo_p ? "D" : "") << endl;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
 | |
| // constituent files with given upsert statements.
 | |
| static void
 | |
| archive_classify (const string& rps, string& archive_extension,
 | |
|                   sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
 | |
|                   sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
 | |
|                   time_t mtime,
 | |
|                   unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
 | |
|                   bool& fts_sref_complete_p)
 | |
| {
 | |
|   string archive_decoder = "/dev/null";
 | |
|   for (auto&& arch : scan_archives)
 | |
|     if (string_endswith(rps, arch.first))
 | |
|       {
 | |
|         archive_extension = arch.first;
 | |
|         archive_decoder = arch.second;
 | |
|       }
 | |
| 
 | |
|   FILE* fp;
 | |
|   defer_dtor<FILE*,int>::dtor_fn dfn;
 | |
|   if (archive_decoder != "cat")
 | |
|     {
 | |
|       string popen_cmd = archive_decoder + " " + shell_escape(rps);
 | |
|       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
 | |
|       dfn = pclose;
 | |
|       if (fp == NULL)
 | |
|         throw libc_exception (errno, string("popen ") + popen_cmd);
 | |
|     }
 | |
|   else
 | |
|     {
 | |
|       fp = fopen (rps.c_str(), "r");
 | |
|       dfn = fclose;
 | |
|       if (fp == NULL)
 | |
|         throw libc_exception (errno, string("fopen ") + rps);
 | |
|     }
 | |
|   defer_dtor<FILE*,int> fp_closer (fp, dfn);
 | |
| 
 | |
|   struct archive *a;
 | |
|   a = archive_read_new();
 | |
|   if (a == NULL)
 | |
|     throw archive_exception("cannot create archive reader");
 | |
|   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
 | |
| 
 | |
|   int rc = archive_read_support_format_all(a);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot select all formats");
 | |
|   rc = archive_read_support_filter_all(a);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot select all filters");
 | |
| 
 | |
|   rc = archive_read_open_FILE (a, fp);
 | |
|   if (rc != ARCHIVE_OK)
 | |
|     throw archive_exception(a, "cannot open archive from pipe");
 | |
| 
 | |
|   if (verbose > 3)
 | |
|     obatched(clog) << "libarchive scanning " << rps << endl;
 | |
| 
 | |
|   while(1) // parse archive entries
 | |
|     {
 | |
|     if (interrupted)
 | |
|       break;
 | |
| 
 | |
|     try
 | |
|         {
 | |
|           struct archive_entry *e;
 | |
|           rc = archive_read_next_header (a, &e);
 | |
|           if (rc != ARCHIVE_OK)
 | |
|             break;
 | |
| 
 | |
|           if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
 | |
|             continue;
 | |
| 
 | |
|           string fn = canonicalized_archive_entry_pathname (e);
 | |
| 
 | |
|           if (verbose > 3)
 | |
|             obatched(clog) << "libarchive checking " << fn << endl;
 | |
| 
 | |
|           // extract this file to a temporary file
 | |
|           char* tmppath = NULL;
 | |
|           rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
 | |
|           if (rc < 0)
 | |
|             throw libc_exception (ENOMEM, "cannot allocate tmppath");
 | |
|           defer_dtor<void*,void> tmmpath_freer (tmppath, free);
 | |
|           int fd = mkstemp (tmppath);
 | |
|           if (fd < 0)
 | |
|             throw libc_exception (errno, "cannot create temporary file");
 | |
|           unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
 | |
|           defer_dtor<int,int> minifd_closer (fd, close);
 | |
| 
 | |
|           rc = archive_read_data_into_fd (a, fd);
 | |
|           if (rc != ARCHIVE_OK)
 | |
|             throw archive_exception(a, "cannot extract file");
 | |
| 
 | |
|           // finally ... time to run elf_classify on this bad boy and update the database
 | |
|           bool executable_p = false, debuginfo_p = false;
 | |
|           string buildid;
 | |
|           set<string> sourcefiles;
 | |
|           elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
 | |
|           // NB: might throw
 | |
| 
 | |
|           if (buildid != "") // intern buildid
 | |
|             {
 | |
|               ps_upsert_buildids
 | |
|                 .reset()
 | |
|                 .bind(1, buildid)
 | |
|                 .step_ok_done();
 | |
|             }
 | |
| 
 | |
|           ps_upsert_files // register this rpm constituent file name in interning table
 | |
|             .reset()
 | |
|             .bind(1, fn)
 | |
|             .step_ok_done();
 | |
| 
 | |
|           if (sourcefiles.size() > 0) // sref records needed
 | |
|             {
 | |
|               // NB: we intern each source file once.  Once raw, as it
 | |
|               // appears in the DWARF file list coming back from
 | |
|               // elf_classify() - because it'll end up in the
 | |
|               // _norm.artifactsrc column.  We don't also put another
 | |
|               // version with a '.' at the front, even though that's
 | |
|               // how rpm/cpio packs names, because we hide that from
 | |
|               // the database for storage efficiency.
 | |
| 
 | |
|               for (auto&& s : sourcefiles)
 | |
|                 {
 | |
|                   if (s == "")
 | |
|                     {
 | |
|                       fts_sref_complete_p = false;
 | |
|                       continue;
 | |
|                     }
 | |
| 
 | |
|                   // PR25548: store canonicalized source path
 | |
|                   const string& dwarfsrc = s;
 | |
|                   string dwarfsrc_canon = canon_pathname (dwarfsrc);
 | |
|                   if (dwarfsrc_canon != dwarfsrc)
 | |
|                     {
 | |
|                       if (verbose > 3)
 | |
|                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
 | |
|                     }
 | |
| 
 | |
|                   ps_upsert_files
 | |
|                     .reset()
 | |
|                     .bind(1, dwarfsrc_canon)
 | |
|                     .step_ok_done();
 | |
| 
 | |
|                   ps_upsert_sref
 | |
|                     .reset()
 | |
|                     .bind(1, buildid)
 | |
|                     .bind(2, dwarfsrc_canon)
 | |
|                     .step_ok_done();
 | |
| 
 | |
|                   fts_sref ++;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|           if (executable_p)
 | |
|             fts_executable ++;
 | |
|           if (debuginfo_p)
 | |
|             fts_debuginfo ++;
 | |
| 
 | |
|           if (executable_p || debuginfo_p)
 | |
|             {
 | |
|               ps_upsert_de
 | |
|                 .reset()
 | |
|                 .bind(1, buildid)
 | |
|                 .bind(2, debuginfo_p ? 1 : 0)
 | |
|                 .bind(3, executable_p ? 1 : 0)
 | |
|                 .bind(4, rps)
 | |
|                 .bind(5, mtime)
 | |
|                 .bind(6, fn)
 | |
|                 .step_ok_done();
 | |
|             }
 | |
|           else // potential source - sdef record
 | |
|             {
 | |
|               fts_sdef ++;
 | |
|               ps_upsert_sdef
 | |
|                 .reset()
 | |
|                 .bind(1, rps)
 | |
|                 .bind(2, mtime)
 | |
|                 .bind(3, fn)
 | |
|                 .step_ok_done();
 | |
|             }
 | |
| 
 | |
|           if ((verbose > 2) && (executable_p || debuginfo_p))
 | |
|             obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
 | |
|                            << " mtime=" << mtime << " atype="
 | |
|                            << (executable_p ? "E" : "")
 | |
|                            << (debuginfo_p ? "D" : "")
 | |
|                            << " sourcefiles=" << sourcefiles.size() << endl;
 | |
| 
 | |
|         }
 | |
|       catch (const reportable_exception& e)
 | |
|         {
 | |
|           e.report(clog);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // scan for archive files such as .rpm
 | |
| static void
 | |
| scan_archive_file (const string& rps, const stat_t& st,
 | |
|                    sqlite_ps& ps_upsert_buildids,
 | |
|                    sqlite_ps& ps_upsert_files,
 | |
|                    sqlite_ps& ps_upsert_de,
 | |
|                    sqlite_ps& ps_upsert_sref,
 | |
|                    sqlite_ps& ps_upsert_sdef,
 | |
|                    sqlite_ps& ps_query,
 | |
|                    sqlite_ps& ps_scan_done,
 | |
|                    unsigned& fts_cached,
 | |
|                    unsigned& fts_executable,
 | |
|                    unsigned& fts_debuginfo,
 | |
|                    unsigned& fts_sref,
 | |
|                    unsigned& fts_sdef)
 | |
| {
 | |
|   /* See if we know of it already. */
 | |
|   int rc = ps_query
 | |
|     .reset()
 | |
|     .bind(1, rps)
 | |
|     .bind(2, st.st_mtime)
 | |
|     .step();
 | |
|   ps_query.reset();
 | |
|   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
 | |
|     // no need to recheck a file/version we already know
 | |
|     // specifically, no need to parse this archive again, since we already have
 | |
|     // it as a D or E or S record,
 | |
|     // (so is stored with buildid=NULL)
 | |
|     {
 | |
|       fts_cached ++;
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|   // intern the archive file name
 | |
|   ps_upsert_files
 | |
|     .reset()
 | |
|     .bind(1, rps)
 | |
|     .step_ok_done();
 | |
| 
 | |
|   // extract the archive contents
 | |
|   unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
 | |
|   bool my_fts_sref_complete_p = true;
 | |
|   try
 | |
|     {
 | |
|       string archive_extension;
 | |
|       archive_classify (rps, archive_extension,
 | |
|                         ps_upsert_buildids, ps_upsert_files,
 | |
|                         ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
 | |
|                         st.st_mtime,
 | |
|                         my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
 | |
|                         my_fts_sref_complete_p);
 | |
|       add_metric ("scanned_bytes_total","source",archive_extension + " archive",
 | |
|                   st.st_size);
 | |
|       inc_metric ("scanned_files_total","source",archive_extension + " archive");
 | |
|       add_metric("found_debuginfo_total","source",archive_extension + " archive",
 | |
|                  my_fts_debuginfo);
 | |
|       add_metric("found_executable_total","source",archive_extension + " archive",
 | |
|                  my_fts_executable);
 | |
|       add_metric("found_sourcerefs_total","source",archive_extension + " archive",
 | |
|                  my_fts_sref);
 | |
|     }
 | |
|   catch (const reportable_exception& e)
 | |
|     {
 | |
|       e.report(clog);
 | |
|     }
 | |
| 
 | |
|   if (verbose > 2)
 | |
|     obatched(clog) << "scanned archive=" << rps
 | |
|                    << " mtime=" << st.st_mtime
 | |
|                    << " executables=" << my_fts_executable
 | |
|                    << " debuginfos=" << my_fts_debuginfo
 | |
|                    << " srefs=" << my_fts_sref
 | |
|                    << " sdefs=" << my_fts_sdef
 | |
|                    << endl;
 | |
| 
 | |
|   fts_executable += my_fts_executable;
 | |
|   fts_debuginfo += my_fts_debuginfo;
 | |
|   fts_sref += my_fts_sref;
 | |
|   fts_sdef += my_fts_sdef;
 | |
| 
 | |
|   if (my_fts_sref_complete_p) // leave incomplete?
 | |
|     ps_scan_done
 | |
|       .reset()
 | |
|       .bind(1, rps)
 | |
|       .bind(2, st.st_mtime)
 | |
|       .bind(3, st.st_size)
 | |
|       .step_ok_done();
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 
 | |
| // The thread that consumes file names off of the scanq.  We hold
 | |
| // the persistent sqlite_ps's at this level and delegate file/archive
 | |
| // scanning to other functions.
 | |
| static void*
 | |
| thread_main_scanner (void* arg)
 | |
| {
 | |
|   (void) arg;
 | |
| 
 | |
|   // all the prepared statements fit to use, the _f_ set:
 | |
|   sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
 | |
|   sqlite_ps ps_f_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
 | |
|   sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
 | |
|                           "insert or ignore into " BUILDIDS "_f_de "
 | |
|                           "(buildid, debuginfo_p, executable_p, file, mtime) "
 | |
|                           "values ((select id from " BUILDIDS "_buildids where hex = ?),"
 | |
|                           "        ?,?,"
 | |
|                           "        (select id from " BUILDIDS "_files where name = ?), ?);");
 | |
|   sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
 | |
|                          "insert or ignore into " BUILDIDS "_f_s "
 | |
|                          "(buildid, artifactsrc, file, mtime) "
 | |
|                          "values ((select id from " BUILDIDS "_buildids where hex = ?),"
 | |
|                          "        (select id from " BUILDIDS "_files where name = ?),"
 | |
|                          "        (select id from " BUILDIDS "_files where name = ?),"
 | |
|                          "        ?);");
 | |
|   sqlite_ps ps_f_query (db, "file-negativehit-find",
 | |
|                         "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
 | |
|                         "and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
 | |
|   sqlite_ps ps_f_scan_done (db, "file-scanned",
 | |
|                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
 | |
|                           "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
 | |
| 
 | |
|   // and now for the _r_ set
 | |
|   sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
 | |
|   sqlite_ps ps_r_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
 | |
|   sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
 | |
|                           "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
 | |
|                           "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
 | |
|                           "(select id from " BUILDIDS "_files where name = ?), ?, "
 | |
|                           "(select id from " BUILDIDS "_files where name = ?));");
 | |
|   sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
 | |
|                             "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
 | |
|                             "(select id from " BUILDIDS "_buildids where hex = ?), "
 | |
|                             "(select id from " BUILDIDS "_files where name = ?));");
 | |
|   sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
 | |
|                             "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
 | |
|                             "(select id from " BUILDIDS "_files where name = ?), ?,"
 | |
|                             "(select id from " BUILDIDS "_files where name = ?));");
 | |
|   sqlite_ps ps_r_query (db, "rpm-negativehit-query",
 | |
|                       "select 1 from " BUILDIDS "_file_mtime_scanned where "
 | |
|                       "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
 | |
|   sqlite_ps ps_r_scan_done (db, "rpm-scanned",
 | |
|                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
 | |
|                           "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
 | |
| 
 | |
| 
 | |
|   unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
 | |
|   unsigned fts_sref = 0, fts_sdef = 0;
 | |
| 
 | |
|   add_metric("thread_count", "role", "scan", 1);
 | |
|   add_metric("thread_busy", "role", "scan", 1);
 | |
|   while (! interrupted)
 | |
|     {
 | |
|       scan_payload p;
 | |
| 
 | |
|       add_metric("thread_busy", "role", "scan", -1);
 | |
|       bool gotone = scanq.wait_front(p);
 | |
|       add_metric("thread_busy", "role", "scan", 1);
 | |
| 
 | |
|       if (! gotone) continue; // go back to waiting
 | |
| 
 | |
|       try
 | |
|         {
 | |
|           bool scan_archive = false;
 | |
|           for (auto&& arch : scan_archives)
 | |
|             if (string_endswith(p.first, arch.first))
 | |
|               scan_archive = true;
 | |
| 
 | |
|           if (scan_archive)
 | |
|             scan_archive_file (p.first, p.second,
 | |
|                                ps_r_upsert_buildids,
 | |
|                                ps_r_upsert_files,
 | |
|                                ps_r_upsert_de,
 | |
|                                ps_r_upsert_sref,
 | |
|                                ps_r_upsert_sdef,
 | |
|                                ps_r_query,
 | |
|                                ps_r_scan_done,
 | |
|                                fts_cached,
 | |
|                                fts_executable,
 | |
|                                fts_debuginfo,
 | |
|                                fts_sref,
 | |
|                                fts_sdef);
 | |
| 
 | |
|           if (scan_files) // NB: maybe "else if" ?
 | |
|             scan_source_file (p.first, p.second,
 | |
|                               ps_f_upsert_buildids,
 | |
|                               ps_f_upsert_files,
 | |
|                               ps_f_upsert_de,
 | |
|                               ps_f_upsert_s,
 | |
|                               ps_f_query,
 | |
|                               ps_f_scan_done,
 | |
|                               fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
 | |
|         }
 | |
|       catch (const reportable_exception& e)
 | |
|         {
 | |
|           e.report(cerr);
 | |
|         }
 | |
| 
 | |
|       scanq.done_front(); // let idlers run
 | |
|       
 | |
|       if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
 | |
|         {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
 | |
|       (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
 | |
|       (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
 | |
| 
 | |
|       // finished a scanning step -- not a "loop", because we just
 | |
|       // consume the traversal loop's work, whenever
 | |
|       inc_metric("thread_work_total","role","scan");
 | |
|     }
 | |
| 
 | |
| 
 | |
|   add_metric("thread_busy", "role", "scan", -1);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // The thread that traverses all the source_paths and enqueues all the
 | |
| // matching files into the file/archive scan queue.
 | |
| static void
 | |
| scan_source_paths()
 | |
| {
 | |
|   // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
 | |
|   // path list.
 | |
|   if (source_paths.empty())
 | |
|     return;
 | |
| 
 | |
|   // Turn the source_paths into an fts(3)-compatible char**.  Since
 | |
|   // source_paths[] does not change after argv processing, the
 | |
|   // c_str()'s are safe to keep around awile.
 | |
|   vector<const char *> sps;
 | |
|   for (auto&& sp: source_paths)
 | |
|     sps.push_back(sp.c_str());
 | |
|   sps.push_back(NULL);
 | |
| 
 | |
|   FTS *fts = fts_open ((char * const *)sps.data(),
 | |
|                       (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
 | |
|                       | FTS_NOCHDIR /* multithreaded */,
 | |
|                       NULL);
 | |
|   if (fts == NULL)
 | |
|     throw libc_exception(errno, "cannot fts_open");
 | |
|   defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
 | |
| 
 | |
|   struct timespec ts_start, ts_end;
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_start);
 | |
|   unsigned fts_scanned = 0, fts_regex = 0;
 | |
| 
 | |
|   FTSENT *f;
 | |
|   while ((f = fts_read (fts)) != NULL)
 | |
|   {
 | |
|     if (interrupted) break;
 | |
| 
 | |
|     if (sigusr2 != forced_groom_count) // stop early if groom triggered
 | |
|       {
 | |
|         scanq.clear(); // clear previously issued work for scanner threads
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|     fts_scanned ++;
 | |
| 
 | |
|     if (verbose > 2)
 | |
|       obatched(clog) << "fts traversing " << f->fts_path << endl;
 | |
| 
 | |
|     switch (f->fts_info)
 | |
|       {
 | |
|       case FTS_F:
 | |
|         {
 | |
|           /* Found a file.  Convert it to an absolute path, so
 | |
|              the buildid database does not have relative path
 | |
|              names that are unresolvable from a subsequent run
 | |
|              in a different cwd. */
 | |
|           char *rp = realpath(f->fts_path, NULL);
 | |
|           if (rp == NULL)
 | |
|             continue; // ignore dangling symlink or such
 | |
|           string rps = string(rp);
 | |
|           free (rp);
 | |
| 
 | |
|           bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
 | |
|           bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
 | |
|           if (!ri || rx)
 | |
|             {
 | |
|               if (verbose > 3)
 | |
|                 obatched(clog) << "fts skipped by regex "
 | |
|                                << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
 | |
|               fts_regex ++;
 | |
|               if (!ri)
 | |
|                 inc_metric("traversed_total","type","file-skipped-I");
 | |
|               if (rx)
 | |
|                 inc_metric("traversed_total","type","file-skipped-X");
 | |
|             }
 | |
|           else
 | |
|             {
 | |
|               scanq.push_back (make_pair(rps, *f->fts_statp));
 | |
|               inc_metric("traversed_total","type","file");
 | |
|             }
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|       case FTS_ERR:
 | |
|       case FTS_NS:
 | |
|         // report on some types of errors because they may reflect fixable misconfiguration
 | |
|         {
 | |
|           auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
 | |
|           x.report(cerr);
 | |
|         }
 | |
|         inc_metric("traversed_total","type","error");
 | |
|         break;
 | |
| 
 | |
|       case FTS_SL: // ignore, but count because debuginfod -L would traverse these
 | |
|         inc_metric("traversed_total","type","symlink");
 | |
|         break;
 | |
| 
 | |
|       case FTS_D: // ignore
 | |
|         inc_metric("traversed_total","type","directory");
 | |
|         break;
 | |
| 
 | |
|       default: // ignore
 | |
|         inc_metric("traversed_total","type","other");
 | |
|         break;
 | |
|       }
 | |
|   }
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_end);
 | |
|   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
 | |
| 
 | |
|   obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
 | |
|                  << ", regex-skipped=" << fts_regex << endl;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void*
 | |
| thread_main_fts_source_paths (void* arg)
 | |
| {
 | |
|   (void) arg; // ignore; we operate on global data
 | |
| 
 | |
|   set_metric("thread_tid", "role","traverse", tid());
 | |
|   add_metric("thread_count", "role", "traverse", 1);
 | |
| 
 | |
|   time_t last_rescan = 0;
 | |
| 
 | |
|   while (! interrupted)
 | |
|     {
 | |
|       sleep (1);
 | |
|       scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
 | |
|       scanq.done_idle(); // release the hounds
 | |
|       if (interrupted) break;
 | |
| 
 | |
|       time_t now = time(NULL);
 | |
|       bool rescan_now = false;
 | |
|       if (last_rescan == 0) // at least one initial rescan is documented even for -t0
 | |
|         rescan_now = true;
 | |
|       if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
 | |
|         rescan_now = true;
 | |
|       if (sigusr1 != forced_rescan_count)
 | |
|         {
 | |
|           forced_rescan_count = sigusr1;
 | |
|           rescan_now = true;
 | |
|         }
 | |
|       if (rescan_now)
 | |
|         {
 | |
|           set_metric("thread_busy", "role","traverse", 1);
 | |
|           try
 | |
|             {
 | |
|               scan_source_paths();
 | |
|             }
 | |
|           catch (const reportable_exception& e)
 | |
|             {
 | |
|               e.report(cerr);
 | |
|             }
 | |
|           last_rescan = time(NULL); // NB: now was before scanning
 | |
|           // finished a traversal loop
 | |
|           inc_metric("thread_work_total", "role","traverse");
 | |
|           set_metric("thread_busy", "role","traverse", 0);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| static void
 | |
| database_stats_report()
 | |
| {
 | |
|   sqlite_ps ps_query (db, "database-overview",
 | |
|                       "select label,quantity from " BUILDIDS "_stats");
 | |
| 
 | |
|   obatched(clog) << "database record counts:" << endl;
 | |
|   while (1)
 | |
|     {
 | |
|       if (interrupted) break;
 | |
|       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
 | |
|         break;
 | |
| 
 | |
|       int rc = ps_query.step();
 | |
|       if (rc == SQLITE_DONE) break;
 | |
|       if (rc != SQLITE_ROW)
 | |
|         throw sqlite_exception(rc, "step");
 | |
| 
 | |
|       obatched(clog)
 | |
|         << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
 | |
|         << " "
 | |
|         << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
 | |
|         << endl;
 | |
| 
 | |
|       set_metric("groom", "statistic",
 | |
|                  ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
 | |
|                  (sqlite3_column_double(ps_query, 1)));
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Do a round of database grooming that might take many minutes to run.
 | |
| void groom()
 | |
| {
 | |
|   obatched(clog) << "grooming database" << endl;
 | |
| 
 | |
|   struct timespec ts_start, ts_end;
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_start);
 | |
| 
 | |
|   // scan for files that have disappeared
 | |
|   sqlite_ps files (db, "check old files",
 | |
|                    "select distinct s.mtime, s.file, f.name from "
 | |
|                    BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
 | |
|                    "where f.id = s.file");
 | |
|   // NB: Because _ftime_mtime_scanned can contain both F and
 | |
|   // R records for the same file, this query would return duplicates if the
 | |
|   // DISTINCT qualifier were not there.
 | |
|   files.reset();
 | |
| 
 | |
|   // DECISION TIME - we enumerate stale fileids/mtimes
 | |
|   deque<pair<int64_t,int64_t> > stale_fileid_mtime;
 | |
|   
 | |
|   time_t time_start = time(NULL);
 | |
|   while(1)
 | |
|     {
 | |
|       // PR28514: limit grooming iteration to O(rescan time), to avoid
 | |
|       // slow filesystem tests over many files locking out rescans for
 | |
|       // too long.
 | |
|       if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
 | |
|         {
 | |
|           inc_metric("groomed_total", "decision", "aborted");
 | |
|           break;
 | |
|         }
 | |
| 
 | |
|       if (interrupted) break;
 | |
| 
 | |
|       int rc = files.step();
 | |
|       if (rc != SQLITE_ROW)
 | |
|         break;
 | |
| 
 | |
|       int64_t mtime = sqlite3_column_int64 (files, 0);
 | |
|       int64_t fileid = sqlite3_column_int64 (files, 1);
 | |
|       const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
 | |
|       struct stat s;
 | |
|       bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
 | |
|       bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
 | |
| 
 | |
|       rc = stat(filename, &s);
 | |
|       if ( (regex_groom && reg_exclude && !reg_include) ||  rc < 0 || (mtime != (int64_t) s.st_mtime) )
 | |
|         {
 | |
|           if (verbose > 2)
 | |
|             obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
 | |
|           stale_fileid_mtime.push_back(make_pair(fileid,mtime));
 | |
|           inc_metric("groomed_total", "decision", "stale");
 | |
|           set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
 | |
|         }
 | |
|       else
 | |
|         inc_metric("groomed_total", "decision", "fresh");
 | |
|       
 | |
|       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
 | |
|         break;
 | |
|     }
 | |
|   files.reset();
 | |
| 
 | |
|   // ACTION TIME
 | |
| 
 | |
|   // Now that we know which file/mtime tuples are stale, actually do
 | |
|   // the deletion from the database.  Doing this during the SELECT
 | |
|   // iteration above results in undefined behaviour in sqlite, as per
 | |
|   // https://www.sqlite.org/isolation.html
 | |
| 
 | |
|   // We could shuffle stale_fileid_mtime[] here.  It'd let aborted
 | |
|   // sequences of nuke operations resume at random locations, instead
 | |
|   // of just starting over.  But it doesn't matter much either way,
 | |
|   // as long as we make progress.
 | |
| 
 | |
|   sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
 | |
|   sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
 | |
|   sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
 | |
|                             "where file = ? and mtime = ?");
 | |
| 
 | |
|   while (! stale_fileid_mtime.empty())
 | |
|     {
 | |
|       auto stale = stale_fileid_mtime.front();
 | |
|       stale_fileid_mtime.pop_front();
 | |
|       set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
 | |
| 
 | |
|       // PR28514: limit grooming iteration to O(rescan time), to avoid
 | |
|       // slow nuke_* queries over many files locking out rescans for too
 | |
|       // long.  We iterate over the files in random() sequence to avoid
 | |
|       // partial checks going over the same set.
 | |
|       if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
 | |
|         {
 | |
|           inc_metric("groomed_total", "action", "aborted");
 | |
|           break;
 | |
|         }
 | |
| 
 | |
|       if (interrupted) break;
 | |
| 
 | |
|       int64_t fileid = stale.first;
 | |
|       int64_t mtime = stale.second;
 | |
|       files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
 | |
|       files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
 | |
|       files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
 | |
|       inc_metric("groomed_total", "action", "cleaned");
 | |
|       
 | |
|        if (sigusr1 != forced_rescan_count) // stop early if scan triggered
 | |
|         break;
 | |
|     }
 | |
|   stale_fileid_mtime.clear(); // no need for this any longer
 | |
|   set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
 | |
|       
 | |
|   // delete buildids with no references in _r_de or _f_de tables;
 | |
|   // cascades to _r_sref & _f_s records
 | |
|   sqlite_ps buildids_del (db, "nuke orphan buildids",
 | |
|                           "delete from " BUILDIDS "_buildids "
 | |
|                           "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
 | |
|                           "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
 | |
|   buildids_del.reset().step_ok_done();
 | |
| 
 | |
|   if (interrupted) return;
 | |
| 
 | |
|   // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
 | |
|   sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
 | |
|   g1.reset().step_ok_done();
 | |
|   sqlite_ps g2 (db, "optimize", "pragma optimize");
 | |
|   g2.reset().step_ok_done();
 | |
|   sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
 | |
|   g3.reset().step_ok_done();
 | |
| 
 | |
|   database_stats_report();
 | |
| 
 | |
|   (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
 | |
| 
 | |
|   sqlite3_db_release_memory(db); // shrink the process if possible
 | |
|   sqlite3_db_release_memory(dbq); // ... for both connections
 | |
|   debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
 | |
| 
 | |
|   fdcache.limit(0,0,0,0); // release the fdcache contents
 | |
|   fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs); // restore status quo parameters
 | |
| 
 | |
|   clock_gettime (CLOCK_MONOTONIC, &ts_end);
 | |
|   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
 | |
| 
 | |
|   obatched(clog) << "groomed database in " << deltas << "s" << endl;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void*
 | |
| thread_main_groom (void* /*arg*/)
 | |
| {
 | |
|   set_metric("thread_tid", "role", "groom", tid());
 | |
|   add_metric("thread_count", "role", "groom", 1);
 | |
| 
 | |
|   time_t last_groom = 0;
 | |
| 
 | |
|   while (1)
 | |
|     {
 | |
|       sleep (1);
 | |
|       scanq.wait_idle(); // PR25394: block scanners during grooming!
 | |
|       if (interrupted) break;
 | |
| 
 | |
|       time_t now = time(NULL);
 | |
|       bool groom_now = false;
 | |
|       if (last_groom == 0) // at least one initial groom is documented even for -g0
 | |
|         groom_now = true;
 | |
|       if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
 | |
|         groom_now = true;
 | |
|       if (sigusr2 != forced_groom_count)
 | |
|         {
 | |
|           forced_groom_count = sigusr2;
 | |
|           groom_now = true;
 | |
|         }
 | |
|       if (groom_now)
 | |
|         {
 | |
|           set_metric("thread_busy", "role", "groom", 1);
 | |
|           try
 | |
|             {
 | |
|               groom ();
 | |
|             }
 | |
|           catch (const sqlite_exception& e)
 | |
|             {
 | |
|               obatched(cerr) << e.message << endl;
 | |
|             }
 | |
|           last_groom = time(NULL); // NB: now was before grooming
 | |
|           // finished a grooming loop
 | |
|           inc_metric("thread_work_total", "role", "groom");
 | |
|           set_metric("thread_busy", "role", "groom", 0);
 | |
|         }
 | |
| 
 | |
|       scanq.done_idle();
 | |
|     }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| static void
 | |
| signal_handler (int /* sig */)
 | |
| {
 | |
|   interrupted ++;
 | |
| 
 | |
|   if (db)
 | |
|     sqlite3_interrupt (db);
 | |
|   if (dbq)
 | |
|     sqlite3_interrupt (dbq);
 | |
| 
 | |
|   // NB: don't do anything else in here
 | |
| }
 | |
| 
 | |
| static void
 | |
| sigusr1_handler (int /* sig */)
 | |
| {
 | |
|    sigusr1 ++;
 | |
|   // NB: don't do anything else in here
 | |
| }
 | |
| 
 | |
| static void
 | |
| sigusr2_handler (int /* sig */)
 | |
| {
 | |
|    sigusr2 ++;
 | |
|   // NB: don't do anything else in here
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| // A user-defined sqlite function, to score the sharedness of the
 | |
| // prefix of two strings.  This is used to compare candidate debuginfo
 | |
| // / source-rpm names, so that the closest match
 | |
| // (directory-topology-wise closest) is found.  This is important in
 | |
| // case the same sref (source file name) is in many -debuginfo or
 | |
| // -debugsource RPMs, such as when multiple versions/releases of the
 | |
| // same package are in the database.
 | |
| 
 | |
| static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
 | |
| {
 | |
|   if (argc != 2)
 | |
|     sqlite3_result_error(c, "expect 2 string arguments", -1);
 | |
|   else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
 | |
|            (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
 | |
|     sqlite3_result_null(c);
 | |
|   else
 | |
|     {
 | |
|       const unsigned char* a = sqlite3_value_text (argv[0]);
 | |
|       const unsigned char* b = sqlite3_value_text (argv[1]);
 | |
|       int i = 0;
 | |
|       while (*a++ == *b++)
 | |
|         i++;
 | |
|       sqlite3_result_int (c, i);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| int
 | |
| main (int argc, char *argv[])
 | |
| {
 | |
|   (void) setlocale (LC_ALL, "");
 | |
|   (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
 | |
|   (void) textdomain (PACKAGE_TARNAME);
 | |
| 
 | |
|   /* Tell the library which version we are expecting.  */
 | |
|   elf_version (EV_CURRENT);
 | |
| 
 | |
|   tmpdir = string(getenv("TMPDIR") ?: "/tmp");
 | |
| 
 | |
|   /* Set computed default values. */
 | |
|   db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
 | |
|   int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
 | |
|   if (rc != 0)
 | |
|     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
 | |
|   rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
 | |
|   if (rc != 0)
 | |
|     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
 | |
| 
 | |
|   // default parameters for fdcache are computed from system stats
 | |
|   struct statfs sfs;
 | |
|   rc = statfs(tmpdir.c_str(), &sfs);
 | |
|   if (rc < 0)
 | |
|     fdcache_mbs = 1024; // 1 gigabyte
 | |
|   else
 | |
|     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
 | |
|   fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
 | |
|   fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
 | |
|   fdcache_fds = (concurrency + fdcache_prefetch) * 2;
 | |
| 
 | |
|   /* Parse and process arguments.  */
 | |
|   int remaining;
 | |
|   argp_program_version_hook = print_version; // this works
 | |
|   (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
 | |
|   if (remaining != argc)
 | |
|       error (EXIT_FAILURE, 0,
 | |
|              "unexpected argument: %s", argv[remaining]);
 | |
| 
 | |
|   if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
 | |
|     obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
 | |
| 
 | |
|   fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs);
 | |
| 
 | |
|   (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
 | |
|   (void) signal (SIGINT, signal_handler); // ^C
 | |
|   (void) signal (SIGHUP, signal_handler); // EOF
 | |
|   (void) signal (SIGTERM, signal_handler); // systemd
 | |
|   (void) signal (SIGUSR1, sigusr1_handler); // end-user
 | |
|   (void) signal (SIGUSR2, sigusr2_handler); // end-user
 | |
| 
 | |
|   /* Get database ready. */
 | |
|   if (! passive_p)
 | |
|     {
 | |
|       rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
 | |
|                                                    |SQLITE_OPEN_URI
 | |
|                                                    |SQLITE_OPEN_PRIVATECACHE
 | |
|                                                    |SQLITE_OPEN_CREATE
 | |
|                                                    |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
 | |
|                             NULL);
 | |
|       if (rc == SQLITE_CORRUPT)
 | |
|         {
 | |
|           (void) unlink (db_path.c_str());
 | |
|           error (EXIT_FAILURE, 0,
 | |
|                  "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
 | |
|         }
 | |
|       else if (rc)
 | |
|         {
 | |
|           error (EXIT_FAILURE, 0,
 | |
|                  "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   // open the readonly query variant
 | |
|   // NB: PRIVATECACHE allows web queries to operate in parallel with
 | |
|   // much other grooming/scanning operation.
 | |
|   rc = sqlite3_open_v2 (db_path.c_str(), &dbq, (SQLITE_OPEN_READONLY
 | |
|                                                 |SQLITE_OPEN_URI
 | |
|                                                 |SQLITE_OPEN_PRIVATECACHE
 | |
|                                                 |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
 | |
|                         NULL);
 | |
|   if (rc)
 | |
|     {
 | |
|       error (EXIT_FAILURE, 0,
 | |
|              "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq));
 | |
|     }
 | |
| 
 | |
| 
 | |
|   obatched(clog) << "opened database " << db_path
 | |
|                  << (db?" rw":"") << (dbq?" ro":"") << endl;
 | |
|   obatched(clog) << "sqlite version " << sqlite3_version << endl;
 | |
|   obatched(clog) << "service mode " << (passive_p ? "passive":"active") << endl;
 | |
| 
 | |
|   // add special string-prefix-similarity function used in rpm sref/sdef resolution
 | |
|   rc = sqlite3_create_function(dbq, "sharedprefix", 2, SQLITE_UTF8, NULL,
 | |
|                                & sqlite3_sharedprefix_fn, NULL, NULL);
 | |
|   if (rc != SQLITE_OK)
 | |
|     error (EXIT_FAILURE, 0,
 | |
|            "cannot create sharedprefix function: %s", sqlite3_errmsg(dbq));
 | |
| 
 | |
|   if (! passive_p)
 | |
|     {
 | |
|       if (verbose > 3)
 | |
|         obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
 | |
|       rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
 | |
|       if (rc != SQLITE_OK)
 | |
|         {
 | |
|           error (EXIT_FAILURE, 0,
 | |
|                  "cannot run database schema ddl: %s", sqlite3_errmsg(db));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   // Start httpd server threads.  Separate pool for IPv4 and IPv6, in
 | |
|   // case the host only has one protocol stack.
 | |
|   MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
 | |
| #if MHD_VERSION >= 0x00095300
 | |
|                                      | MHD_USE_INTERNAL_POLLING_THREAD
 | |
| #else
 | |
|                                      | MHD_USE_SELECT_INTERNALLY
 | |
| #endif
 | |
|                                      | MHD_USE_DEBUG, /* report errors to stderr */
 | |
|                                      http_port,
 | |
|                                      NULL, NULL, /* default accept policy */
 | |
|                                      handler_cb, NULL, /* handler callback */
 | |
|                                      MHD_OPTION_END);
 | |
|   MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
 | |
| #if MHD_VERSION >= 0x00095300
 | |
|                                      | MHD_USE_INTERNAL_POLLING_THREAD
 | |
| #else
 | |
|                                      | MHD_USE_SELECT_INTERNALLY
 | |
| #endif
 | |
|                                      | MHD_USE_IPv6
 | |
|                                      | MHD_USE_DEBUG, /* report errors to stderr */
 | |
|                                      http_port,
 | |
|                                      NULL, NULL, /* default accept policy */
 | |
|                                      handler_cb, NULL, /* handler callback */
 | |
|                                      MHD_OPTION_END);
 | |
| 
 | |
|   if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo
 | |
|     {
 | |
|       sqlite3 *database = db;
 | |
|       sqlite3 *databaseq = dbq;
 | |
|       db = dbq = 0; // for signal_handler not to freak
 | |
|       sqlite3_close (databaseq);
 | |
|       sqlite3_close (database);
 | |
|       error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port);
 | |
|     }
 | |
| 
 | |
|   obatched(clog) << "started http server on "
 | |
|                  << (d4 != NULL ? "IPv4 " : "")
 | |
|                  << (d6 != NULL ? "IPv6 " : "")
 | |
|                  << "port=" << http_port << endl;
 | |
| 
 | |
|   // add maxigroom sql if -G given
 | |
|   if (maxigroom)
 | |
|     {
 | |
|       obatched(clog) << "maxigrooming database, please wait." << endl;
 | |
|       extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
 | |
|       extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
 | |
|       extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
 | |
| 
 | |
|       // NB: we don't maxigroom the _files interning table.  It'd require a temp index on all the
 | |
|       // tables that have file foreign-keys, which is a lot.
 | |
| 
 | |
|       // NB: with =delete, may take up 3x disk space total during vacuum process
 | |
|       //     vs.  =off (only 2x but may corrupt database if program dies mid-vacuum)
 | |
|       //     vs.  =wal (>3x observed, but safe)
 | |
|       extra_ddl.push_back("pragma journal_mode=delete;");
 | |
|       extra_ddl.push_back("vacuum;");
 | |
|       extra_ddl.push_back("pragma journal_mode=wal;");
 | |
|     }
 | |
| 
 | |
|   // run extra -D sql if given
 | |
|   if (! passive_p)
 | |
|     for (auto&& i: extra_ddl)
 | |
|       {
 | |
|         if (verbose > 1)
 | |
|           obatched(clog) << "extra ddl:\n" << i << endl;
 | |
|         rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
 | |
|         if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
 | |
|           error (0, 0,
 | |
|                  "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
 | |
| 
 | |
|         if (maxigroom)
 | |
|           obatched(clog) << "maxigroomed database" << endl;
 | |
|       }
 | |
| 
 | |
|   if (! passive_p)
 | |
|     obatched(clog) << "search concurrency " << concurrency << endl;
 | |
|   if (! passive_p)
 | |
|     obatched(clog) << "rescan time " << rescan_s << endl;
 | |
|   obatched(clog) << "fdcache fds " << fdcache_fds << endl;
 | |
|   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
 | |
|   obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
 | |
|   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
 | |
|   obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
 | |
|   if (! passive_p)
 | |
|     obatched(clog) << "groom time " << groom_s << endl;
 | |
|   obatched(clog) << "prefetch fds " << fdcache_prefetch_fds << endl;
 | |
|   obatched(clog) << "prefetch mbs " << fdcache_prefetch_mbs << endl;
 | |
|   obatched(clog) << "forwarded ttl limit " << forwarded_ttl_limit << endl;
 | |
| 
 | |
|   if (scan_archives.size()>0)
 | |
|     {
 | |
|       obatched ob(clog);
 | |
|       auto& o = ob << "accepting archive types ";
 | |
|       for (auto&& arch : scan_archives)
 | |
| 	o << arch.first << "(" << arch.second << ") ";
 | |
|       o << endl;
 | |
|     }
 | |
|   const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
 | |
|   if (du && du[0] != '\0') // set to non-empty string?
 | |
|     obatched(clog) << "upstream debuginfod servers: " << du << endl;
 | |
| 
 | |
|   vector<pthread_t> all_threads;
 | |
| 
 | |
|   if (! passive_p)
 | |
|     {
 | |
|       pthread_t pt;
 | |
|       rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
 | |
|       if (rc)
 | |
|         error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
 | |
|       else
 | |
|         {
 | |
| #ifdef HAVE_PTHREAD_SETNAME_NP
 | |
|           (void) pthread_setname_np (pt, "groom");
 | |
| #endif
 | |
|           all_threads.push_back(pt);
 | |
|         }
 | |
| 
 | |
|       if (scan_files || scan_archives.size() > 0)
 | |
|         {
 | |
|           rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
 | |
|           if (rc)
 | |
|             error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
 | |
| #ifdef HAVE_PTHREAD_SETNAME_NP
 | |
|           (void) pthread_setname_np (pt, "traverse");
 | |
| #endif
 | |
|           all_threads.push_back(pt);
 | |
| 
 | |
|           for (unsigned i=0; i<concurrency; i++)
 | |
|             {
 | |
|               rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
 | |
|               if (rc)
 | |
|                 error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
 | |
| #ifdef HAVE_PTHREAD_SETNAME_NP
 | |
|               (void) pthread_setname_np (pt, "scan");
 | |
| #endif
 | |
|               all_threads.push_back(pt);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|   
 | |
|   /* Trivial main loop! */
 | |
|   set_metric("ready", 1);
 | |
|   while (! interrupted)
 | |
|     pause ();
 | |
|   scanq.nuke(); // wake up any remaining scanq-related threads, let them die
 | |
|   set_metric("ready", 0);
 | |
| 
 | |
|   if (verbose)
 | |
|     obatched(clog) << "stopping" << endl;
 | |
| 
 | |
|   /* Join all our threads. */
 | |
|   for (auto&& it : all_threads)
 | |
|     pthread_join (it, NULL);
 | |
| 
 | |
|   /* Stop all the web service threads. */
 | |
|   if (d4) MHD_stop_daemon (d4);
 | |
|   if (d6) MHD_stop_daemon (d6);
 | |
| 
 | |
|   if (! passive_p)
 | |
|     {
 | |
|       /* With all threads known dead, we can clean up the global resources. */
 | |
|       rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
 | |
|       if (rc != SQLITE_OK)
 | |
|         {
 | |
|           error (0, 0,
 | |
|                  "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|   // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
 | |
|   (void) regfree (& file_include_regex);
 | |
|   (void) regfree (& file_exclude_regex);
 | |
| 
 | |
|   sqlite3 *database = db;
 | |
|   sqlite3 *databaseq = dbq;
 | |
|   db = dbq = 0; // for signal_handler not to freak
 | |
|   (void) sqlite3_close (databaseq);
 | |
|   if (! passive_p)
 | |
|     (void) sqlite3_close (database);
 | |
| 
 | |
|   return 0;
 | |
| }
 |