Initial Commit
This commit is contained in:
94
internal/config/config.go
Normal file
94
internal/config/config.go
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/caarlos0/env/v11"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
SearchThreads int `toml:"searchThreads" env:"SEARCH_THREADS"`
|
||||
Repos []Repo `toml:"repo" envPrefix:"REPO"`
|
||||
}
|
||||
|
||||
type Repo struct {
|
||||
Name string `toml:"name" env:"NAME"`
|
||||
Type string `toml:"type" env:"TYPE"`
|
||||
BaseURL string `toml:"base_url" env:"BASE_URL"`
|
||||
Version string `toml:"version" env:"VERSION"`
|
||||
Repos []string `toml:"repos" env:"REPOS"`
|
||||
Architectures []string `toml:"arch" env:"ARCHES"`
|
||||
RefreshSchedule string `toml:"refresh_schedule" env:"REFRESH_SCHEDULE"`
|
||||
}
|
||||
|
||||
func Load() (cfg *Config, err error) {
|
||||
cfg = &Config{
|
||||
SearchThreads: 4,
|
||||
}
|
||||
|
||||
if fl, err := os.Open("/etc/distrohop.toml"); err == nil {
|
||||
err = toml.NewDecoder(fl).Decode(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
cfgDir := "/distrohop.toml"
|
||||
if os.Getenv("RUNNING_IN_DOCKER") != "true" {
|
||||
cfgDir, err = os.UserConfigDir()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if fl, err := os.Open(filepath.Join(cfgDir, "distrohop.toml")); err == nil {
|
||||
err = toml.NewDecoder(fl).Decode(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
err = env.ParseWithOptions(cfg, env.Options{Prefix: "DISTROHOP_"})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i, repo := range cfg.Repos {
|
||||
if len(repo.Architectures) == 0 {
|
||||
repo.Architectures = []string{""}
|
||||
}
|
||||
if len(repo.Repos) == 0 {
|
||||
repo.Repos = []string{""}
|
||||
}
|
||||
if repo.RefreshSchedule == "" {
|
||||
repo.RefreshSchedule = "0 0 * * *"
|
||||
}
|
||||
cfg.Repos[i] = repo
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
114
internal/index/apt.go
Normal file
114
internal/index/apt.go
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"go.elara.ws/distrohop/internal/tags"
|
||||
)
|
||||
|
||||
type APT struct{}
|
||||
|
||||
func (APT) Name() string {
|
||||
return "apt"
|
||||
}
|
||||
|
||||
func (APT) IndexURL(baseURL, version, repo, arch string) ([]string, error) {
|
||||
indexURL, err := url.JoinPath(baseURL, "dists", version, repo, "Contents-"+arch+".gz")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Before Debian Wheezy, the path to Contents indices didn't include $COMP/repo, so we need to try
|
||||
// both the new and old URL formats. Ubuntu also still uses the pre-Debian-Wheezy convention.
|
||||
deprecatedURL, err := url.JoinPath(baseURL, "dists", version, "Contents-"+arch+".gz")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return []string{indexURL, deprecatedURL}, nil
|
||||
}
|
||||
|
||||
func (APT) ReadPkgData(r io.Reader, out chan Record) {
|
||||
ctx := context.Background()
|
||||
format, r, err := archives.Identify(ctx, "", r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
decomp, ok := format.(archives.Decompressor)
|
||||
if !ok {
|
||||
out <- Record{Error: errors.New("downloaded index is not a valid compressed file")}
|
||||
return
|
||||
}
|
||||
|
||||
dr, err := decomp.OpenReader(r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
defer dr.Close()
|
||||
|
||||
br := bufio.NewReader(dr)
|
||||
for {
|
||||
line, err := br.ReadString('\n')
|
||||
if errors.Is(err, io.EOF) {
|
||||
close(out)
|
||||
break
|
||||
} else if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
lastSpaceIdx := strings.LastIndexByte(line, ' ')
|
||||
if lastSpaceIdx == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
fpath := "/" + strings.TrimSpace(line[:lastSpaceIdx])
|
||||
names := strings.Split(strings.TrimSpace(line[lastSpaceIdx+1:]), ",")
|
||||
for _, name := range names {
|
||||
slashIdx := strings.LastIndexByte(name, '/')
|
||||
if slashIdx != -1 {
|
||||
name = name[slashIdx+1:]
|
||||
}
|
||||
|
||||
if strings.Contains(fpath, "changelog.Debian") ||
|
||||
strings.Contains(fpath, "README.Debian") ||
|
||||
strings.Contains(fpath, "NEWS.Debian.gz") {
|
||||
continue
|
||||
}
|
||||
|
||||
out <- Record{
|
||||
Name: name,
|
||||
Tags: tags.Generate(fpath),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
154
internal/index/dnf.go
Normal file
154
internal/index/dnf.go
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"go.elara.ws/distrohop/internal/tags"
|
||||
)
|
||||
|
||||
type repomd struct {
|
||||
Locations []location `xml:"data>location"`
|
||||
}
|
||||
|
||||
type location struct {
|
||||
Href string `xml:"href,attr"`
|
||||
}
|
||||
|
||||
func (r repomd) getGzipFile() string {
|
||||
for _, loc := range r.Locations {
|
||||
if strings.HasSuffix(loc.Href, "filelists.xml.gz") {
|
||||
return loc.Href
|
||||
}
|
||||
}
|
||||
return "<unknown>"
|
||||
}
|
||||
|
||||
type DNF struct{}
|
||||
|
||||
func (DNF) Name() string {
|
||||
return "dnf"
|
||||
}
|
||||
|
||||
func (DNF) IndexURL(baseURL, version, repo, arch string) ([]string, error) {
|
||||
u, err := url.ParseRequestURI(baseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
repomdPath := fmt.Sprintf("/pub/fedora/linux/releases/%s/%s/%s/os/repodata/repomd.xml", version, repo, arch)
|
||||
u.Path = repomdPath
|
||||
|
||||
res, err := http.Get(u.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
var data repomd
|
||||
err = xml.NewDecoder(res.Body).Decode(&data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gzipFile := data.getGzipFile()
|
||||
if gzipFile == "" {
|
||||
return nil, errors.New("no gzip file found in repomd.xml")
|
||||
}
|
||||
|
||||
u.Path = fmt.Sprintf("/pub/fedora/linux/releases/%s/%s/%s/os/%s", version, repo, arch, gzipFile)
|
||||
return []string{u.String()}, nil
|
||||
}
|
||||
|
||||
func (DNF) ReadPkgData(r io.Reader, out chan Record) {
|
||||
ctx := context.Background()
|
||||
format, r, err := archives.Identify(ctx, "", r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
decomp, ok := format.(archives.Decompressor)
|
||||
if !ok {
|
||||
out <- Record{Error: errors.New("downloaded index is not a valid compressed file")}
|
||||
return
|
||||
}
|
||||
|
||||
dr, err := decomp.OpenReader(r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
defer dr.Close()
|
||||
|
||||
br := bufio.NewReader(dr)
|
||||
var currentPkg string
|
||||
|
||||
for {
|
||||
line, err := br.ReadString('\n')
|
||||
if errors.Is(err, io.EOF) {
|
||||
close(out)
|
||||
break
|
||||
} else if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(line, "<file"):
|
||||
// Skip directories and symlinks
|
||||
if strings.HasPrefix(line[5:], `type="dir"`) || line[5] == 'l' {
|
||||
continue
|
||||
}
|
||||
|
||||
start := strings.IndexByte(line, '>') + 1
|
||||
end := strings.LastIndexByte(line, '<')
|
||||
fpath := line[start:end]
|
||||
|
||||
if strings.Contains(fpath, ".build-id") {
|
||||
continue
|
||||
}
|
||||
|
||||
out <- Record{
|
||||
Name: currentPkg,
|
||||
Tags: tags.Generate(fpath),
|
||||
}
|
||||
case strings.HasPrefix(line, "<package"):
|
||||
start := strings.LastIndex(line, `name="`) + 6
|
||||
end := start + strings.IndexByte(line[start:], '"')
|
||||
currentPkg = line[start:end]
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
60
internal/index/index.go
Normal file
60
internal/index/index.go
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Record represents a data record for a single package
|
||||
type Record struct {
|
||||
Name string
|
||||
Tags []string
|
||||
Error error
|
||||
}
|
||||
|
||||
type Importer interface {
|
||||
// Name returns the name of the importer
|
||||
Name() string
|
||||
// IndexURL generates a list of possible index URLs to try
|
||||
IndexURL(baseURL, version, repo, arch string) ([]string, error)
|
||||
// ReadPkgData reads data from an index file and sends it on out
|
||||
ReadPkgData(r io.Reader, out chan Record)
|
||||
}
|
||||
|
||||
var importers = []Importer{
|
||||
APT{},
|
||||
DNF{},
|
||||
Pacman{},
|
||||
}
|
||||
|
||||
// GetImporter gets an importer by its name
|
||||
func GetImporter(name string) (Importer, error) {
|
||||
for _, importer := range importers {
|
||||
if importer.Name() == name {
|
||||
return importer, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("no such importer: %q", name)
|
||||
}
|
||||
145
internal/index/pacman.go
Normal file
145
internal/index/pacman.go
Normal file
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"go.elara.ws/distrohop/internal/tags"
|
||||
)
|
||||
|
||||
type Pacman struct{}
|
||||
|
||||
func (Pacman) Name() string {
|
||||
return "pacman"
|
||||
}
|
||||
|
||||
func (Pacman) IndexURL(baseURL, version, repo, arch string) ([]string, error) {
|
||||
baseURL = os.Expand(baseURL, func(s string) string {
|
||||
switch s {
|
||||
case "repo":
|
||||
return repo
|
||||
case "arch":
|
||||
return arch
|
||||
}
|
||||
return "$" + s
|
||||
})
|
||||
|
||||
u, err := url.ParseRequestURI(baseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filePath, err := url.JoinPath(u.Path, repo+".files")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
u.Path = filePath
|
||||
return []string{u.String()}, nil
|
||||
}
|
||||
|
||||
func (Pacman) ReadPkgData(r io.Reader, out chan Record) {
|
||||
ctx := context.Background()
|
||||
format, r, err := archives.Identify(ctx, "", r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
decomp, ok := format.(archives.Decompressor)
|
||||
if !ok {
|
||||
out <- Record{Error: errors.New("downloaded index is not a valid compressed file")}
|
||||
return
|
||||
}
|
||||
|
||||
dr, err := decomp.OpenReader(r)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
defer dr.Close()
|
||||
|
||||
tr := tar.NewReader(dr)
|
||||
var currentPkg string
|
||||
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
close(out)
|
||||
break
|
||||
} else if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
switch path.Base(hdr.Name) {
|
||||
case "desc":
|
||||
data, err := io.ReadAll(tr)
|
||||
if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
labelIdx := bytes.Index(data, []byte("%NAME%\n"))
|
||||
if labelIdx == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
start := labelIdx + 7
|
||||
end := start + bytes.IndexByte(data[start:], '\n')
|
||||
currentPkg = string(data[start:end])
|
||||
case "files":
|
||||
br := bufio.NewReader(tr)
|
||||
for {
|
||||
fpath, err := br.ReadString('\n')
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
out <- Record{Error: err}
|
||||
return
|
||||
}
|
||||
|
||||
fpath = strings.TrimSpace(fpath)
|
||||
if fpath == "%FILES%" || strings.HasSuffix(fpath, "/") {
|
||||
continue
|
||||
}
|
||||
|
||||
fpath = "/" + fpath
|
||||
|
||||
out <- Record{
|
||||
Name: currentPkg,
|
||||
Tags: tags.Generate(fpath),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
208
internal/pull/pull.go
Normal file
208
internal/pull/pull.go
Normal file
@@ -0,0 +1,208 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package pull
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/zeebo/sbloom"
|
||||
"go.elara.ws/distrohop/internal/index"
|
||||
"go.elara.ws/distrohop/internal/store"
|
||||
)
|
||||
|
||||
const batchSize = 5000
|
||||
|
||||
// ErrUpToDate is returned when a repository index is already
|
||||
// up to date and doesn't require a pull.
|
||||
var ErrUpToDate = errors.New("repository is already up to date")
|
||||
|
||||
// Options represents settings for pull operations
|
||||
type Options struct {
|
||||
BaseURL string
|
||||
Version string
|
||||
Repo string
|
||||
Architecture string
|
||||
ProgressFunc func(title string, received, total int64)
|
||||
}
|
||||
|
||||
// progressReader keeps track of download progress and calls
|
||||
// progressFn with the current progress data.
|
||||
type progressReader struct {
|
||||
r io.Reader
|
||||
title string
|
||||
received int64
|
||||
total int64
|
||||
progressFn func(title string, received, total int64)
|
||||
}
|
||||
|
||||
func (pr *progressReader) Read(b []byte) (int, error) {
|
||||
n, err := pr.r.Read(b)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
pr.received += int64(n)
|
||||
pr.progressFn(pr.title, pr.received, pr.total)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Pull synchronizes a repository index from a remote repository and atomically updates the store.
|
||||
// If the index is already up to date, it returns [ErrUpToDate]. If opts.ProgressFunc is set,
|
||||
// Pull will call it continuously with the current progress of the pull operation. The original store
|
||||
// remains usable and unmodified until the pull operation completes successfully. It will only be
|
||||
// blocked for the duration of the atomic replacement operation.
|
||||
func Pull(opts Options, s *store.Store, importer index.Importer) error {
|
||||
indexURLs, err := importer.IndexURL(opts.BaseURL, opts.Version, opts.Repo, opts.Architecture)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var (
|
||||
res *http.Response
|
||||
errs []error
|
||||
)
|
||||
for _, indexURL := range indexURLs {
|
||||
ires, err := http.Get(indexURL)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if ires.StatusCode != 200 {
|
||||
errs = append(errs, fmt.Errorf("http: %s", ires.Status))
|
||||
continue
|
||||
} else {
|
||||
res = ires
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if res == nil {
|
||||
return errors.Join(errs...)
|
||||
} else {
|
||||
defer res.Body.Close()
|
||||
}
|
||||
|
||||
repoKey := strings.Trim(opts.Version+"/"+opts.Repo+"/"+opts.Architecture, "/")
|
||||
|
||||
if meta, err := s.GetMeta(); err == nil {
|
||||
// If the ETag stored in the database is the same as the one we got from the
|
||||
// HTTP response, the repo is up to date.
|
||||
if etag := res.Header.Get("ETag"); etag != "" && etag == meta.ETag {
|
||||
return ErrUpToDate
|
||||
}
|
||||
|
||||
if lastModStr := res.Header.Get("Last-Modified"); lastModStr != "" && !meta.LastModified.IsZero() {
|
||||
lastMod, err := time.Parse(time.RFC1123, lastModStr)
|
||||
// If the last modified time from the HTTP response is before
|
||||
// or equal to the time in the database, the repo is up to date.
|
||||
if err == nil && meta.LastModified.Compare(lastMod) >= 0 {
|
||||
return ErrUpToDate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dir, err := os.MkdirTemp(filepath.Dir(s.Path), "distrohop-pull.*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s2, err := store.Open(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var r io.Reader = res.Body
|
||||
if opts.ProgressFunc != nil {
|
||||
r = &progressReader{
|
||||
r: res.Body,
|
||||
title: repoKey,
|
||||
total: res.ContentLength,
|
||||
progressFn: opts.ProgressFunc,
|
||||
}
|
||||
}
|
||||
|
||||
out := make(chan index.Record)
|
||||
go importer.ReadPkgData(r, out)
|
||||
|
||||
filters := map[byte]*sbloom.Filter{}
|
||||
|
||||
i := 0
|
||||
collected := make(map[string]index.Record, batchSize)
|
||||
for rec := range out {
|
||||
if rec.Error != nil {
|
||||
return rec.Error
|
||||
}
|
||||
|
||||
curRec, ok := collected[rec.Name]
|
||||
if !ok {
|
||||
collected[rec.Name] = rec
|
||||
} else {
|
||||
curRec.Tags = append(curRec.Tags, rec.Tags...)
|
||||
collected[rec.Name] = curRec
|
||||
}
|
||||
|
||||
if i >= batchSize {
|
||||
err = s2.WriteBatch(collected, filters)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
clear(collected)
|
||||
i = 0
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
if len(collected) != 0 {
|
||||
err = s2.WriteBatch(collected, filters)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = s2.WriteFilters(filters)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
meta := store.RepoMeta{ETag: res.Header.Get("ETag")}
|
||||
|
||||
if lastMod := res.Header.Get("Last-Modified"); lastMod != "" {
|
||||
meta.LastModified, err = time.Parse(time.RFC1123, lastMod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := s2.WriteMeta(meta); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.Replace(s2)
|
||||
}
|
||||
72
internal/store/cached/cached.go
Normal file
72
internal/store/cached/cached.go
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package cached
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/patrickmn/go-cache"
|
||||
"go.elara.ws/distrohop/internal/store"
|
||||
)
|
||||
|
||||
var _ store.ReadOnly = (*Store)(nil)
|
||||
|
||||
// cacheRecord represents a single item stored in the cache
|
||||
type cacheRecord struct {
|
||||
results []store.TagResult
|
||||
latency time.Duration
|
||||
}
|
||||
|
||||
// Store represents a cached store that caches search results from [go.elara.ws/distrohop/internal/store.ReadOnly] instances.
|
||||
// It implements [go.elara.ws/distrohop/internal/store.ReadOnly].
|
||||
type Store struct {
|
||||
store.ReadOnly
|
||||
cache *cache.Cache
|
||||
}
|
||||
|
||||
// New creates a new cached store with the provided cache settings and underlying store.
|
||||
func New(s store.ReadOnly, exp, cleanup time.Duration) Store {
|
||||
return Store{
|
||||
ReadOnly: s,
|
||||
cache: cache.New(exp, cleanup),
|
||||
}
|
||||
}
|
||||
|
||||
// Search retrieves cached search results for the given tags. If the search doesn't exist
|
||||
// in the cache, it queries the underlying store and adds the results to the cache.
|
||||
func (cs Store) Search(tags []string) ([]store.TagResult, time.Duration, error) {
|
||||
cacheKey := strings.Join(tags, "\x1F")
|
||||
if results, ok := cs.cache.Get(cacheKey); ok {
|
||||
record := results.(cacheRecord)
|
||||
return record.results, record.latency, nil
|
||||
}
|
||||
res, latency, err := cs.ReadOnly.Search(tags)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if len(res) != 0 {
|
||||
cs.cache.Set(cacheKey, cacheRecord{res, latency}, cache.DefaultExpiration)
|
||||
}
|
||||
return res, latency, nil
|
||||
}
|
||||
134
internal/store/combined/combined.go
Normal file
134
internal/store/combined/combined.go
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package combined
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cockroachdb/pebble"
|
||||
"go.elara.ws/distrohop/internal/store"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
var _ store.ReadOnly = (*Store)(nil)
|
||||
|
||||
var ErrNotFound = errors.New("no such package")
|
||||
|
||||
// Store represents a combined store that aggregates multiple individual [go.elara.ws/distrohop/internal/store.Store] instances.
|
||||
// It implements [go.elara.ws/distrohop/internal/store.ReadOnly].
|
||||
type Store struct {
|
||||
Stores []store.ReadOnly
|
||||
}
|
||||
|
||||
// New creates a new combined store with the provided individual stores.
|
||||
func New(stores ...store.ReadOnly) *Store {
|
||||
return &Store{stores}
|
||||
}
|
||||
|
||||
// Add adds a new store to the combined store.
|
||||
func (cs *Store) Add(s store.ReadOnly) {
|
||||
cs.Stores = append(cs.Stores, s)
|
||||
}
|
||||
|
||||
// GetPkg retrieves a package by name from any of the stores in the combined store.
|
||||
// If the package is not found in any store, it returns [github.com/cockroachdb/pebble.ErrNotFound].
|
||||
func (cs *Store) GetPkg(name string) (out store.Package, err error) {
|
||||
mtx := &sync.Mutex{}
|
||||
wg := &errgroup.Group{}
|
||||
for _, s := range cs.Stores {
|
||||
wg.Go(func() error {
|
||||
if pkg, err := s.GetPkg(name); err == nil {
|
||||
mtx.Lock()
|
||||
out = pkg
|
||||
mtx.Unlock()
|
||||
} else if !errors.Is(err, pebble.ErrNotFound) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := wg.Wait(); err != nil {
|
||||
return out, err
|
||||
} else if out.Name == "" {
|
||||
return out, fmt.Errorf("%w: %q", ErrNotFound, name)
|
||||
} else {
|
||||
return out, nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetPkgNamesByPrefix retrieves package names that match the given prefix from all stores.
|
||||
// It returns a slice of package names limited to the specified number n.
|
||||
func (cs *Store) GetPkgNamesByPrefix(prefix string, n int) (out []string, err error) {
|
||||
mtx := &sync.Mutex{}
|
||||
wg := &errgroup.Group{}
|
||||
for _, s := range cs.Stores {
|
||||
wg.Go(func() error {
|
||||
names, err := s.GetPkgNamesByPrefix(prefix, n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mtx.Lock()
|
||||
out = append(out, names...)
|
||||
mtx.Unlock()
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := wg.Wait(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
slices.Sort(out)
|
||||
if len(out) > n {
|
||||
out = out[:n]
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Search searches for packages across all stores based on the provided tags.
|
||||
// It returns a slice of search results and an error.
|
||||
func (cs *Store) Search(tags []string) (out []store.TagResult, latency time.Duration, err error) {
|
||||
mtx := &sync.Mutex{}
|
||||
wg := &errgroup.Group{}
|
||||
for _, s := range cs.Stores {
|
||||
wg.Go(func() error {
|
||||
results, dur, err := s.Search(tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mtx.Lock()
|
||||
latency += dur
|
||||
out = append(out, results...)
|
||||
mtx.Unlock()
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := wg.Wait(); err != nil {
|
||||
return nil, latency, err
|
||||
} else {
|
||||
store.SortResults(out)
|
||||
return out, latency, nil
|
||||
}
|
||||
}
|
||||
236
internal/store/search.go
Normal file
236
internal/store/search.go
Normal file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/cockroachdb/pebble"
|
||||
)
|
||||
|
||||
func init() {
|
||||
gob.Register(&xxhash.Digest{})
|
||||
}
|
||||
|
||||
var ErrInvalidTag = errors.New("invalid tag format")
|
||||
|
||||
var (
|
||||
// startChars is a list of all the possible package name starting characters
|
||||
startChars = [...]byte{
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
}
|
||||
// iterOpts contains iterator options with bounds defined such that
|
||||
// they cover all packages starting with each character defined
|
||||
// in startChars
|
||||
iterOpts = make([]*pebble.IterOptions, len(startChars))
|
||||
// tagRegex validates the format of a given tag
|
||||
tagRegex = regexp.MustCompile(`\w+=.+`)
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Populate the iterOpts slice
|
||||
for i, char := range startChars {
|
||||
iterOpts[i] = &pebble.IterOptions{
|
||||
LowerBound: []byte{char},
|
||||
UpperBound: []byte{char + 1},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TagResult represents the result of a tag search, including confidence and overlapping tags.
|
||||
type TagResult struct {
|
||||
// The confidence score for the tag match. This value will always be between 0 and 1.
|
||||
Confidence float32
|
||||
// A list of overlapping tags
|
||||
Overlap []string
|
||||
// The package associated with the tag result
|
||||
Package Package
|
||||
}
|
||||
|
||||
// Search searches for packages in the store that match the given tags.
|
||||
// Each tag must be in the format "key=value", and an error is returned
|
||||
// if any tag does not conform to this format. The function spawns multiple
|
||||
// worker goroutines (defined by s.SearchThreads) to perform a concurrent search.
|
||||
// The result is a list of [TagResult] structs representing the matching packages.
|
||||
func (s *Store) Search(tags []string) ([]TagResult, time.Duration, error) {
|
||||
start := time.Now()
|
||||
for _, tag := range tags {
|
||||
if !tagRegex.MatchString(tag) {
|
||||
return nil, 0, fmt.Errorf("%w: %q", ErrInvalidTag, tag)
|
||||
}
|
||||
}
|
||||
|
||||
optsMtx := &sync.Mutex{}
|
||||
opts := iterOpts
|
||||
|
||||
var results []TagResult
|
||||
resultsMtx := &sync.Mutex{}
|
||||
wg := &sync.WaitGroup{}
|
||||
errs := make(chan error)
|
||||
for range s.SearchThreads {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
optsMtx.Lock()
|
||||
if len(opts) == 0 {
|
||||
// If we have no more options structs left,
|
||||
// we can exit the goroutine
|
||||
optsMtx.Unlock()
|
||||
return
|
||||
}
|
||||
opt := opts[0]
|
||||
opts = opts[1:]
|
||||
optsMtx.Unlock()
|
||||
|
||||
found := false
|
||||
if filter, err := s.GetFilter(opt.LowerBound[0]); err == nil {
|
||||
for _, tag := range tags {
|
||||
if filter.Lookup(unsafeBytes(tag)) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if !errors.Is(err, pebble.ErrNotFound) {
|
||||
errs <- err
|
||||
return
|
||||
}
|
||||
|
||||
// Skip the current chunk if the bloom filter
|
||||
// doesn't contain any of the tags, or if it doesn't
|
||||
// exist, which indicates that there are no packages
|
||||
// with the starting character we're looking for.
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
|
||||
// Create a new iterator that scans through the range defined in opt
|
||||
iter, err := s.db.NewIter(opt)
|
||||
if err != nil {
|
||||
errs <- err
|
||||
return
|
||||
}
|
||||
|
||||
var out []TagResult
|
||||
for iter.First(); iter.Valid(); iter.Next() {
|
||||
val, err := iter.ValueAndErr()
|
||||
if err != nil {
|
||||
errs <- err
|
||||
iter.Close()
|
||||
return
|
||||
}
|
||||
|
||||
// Convert the tag data to a string using an unsafe operation
|
||||
// so that we can split it by the unit separator character
|
||||
// and check if it has overlap without incurring the cost
|
||||
// of copying the value for a string conversion.
|
||||
//
|
||||
// If we find that there's overlap, we'll copy the data
|
||||
// later, before returning it.
|
||||
ptags := strings.Split(unsafeString(val), "\x1F")
|
||||
overlapTags, conf := overlap(tags, ptags)
|
||||
if conf == 0 {
|
||||
// If the confidence is zero, there's no overlap,
|
||||
// so we can continue to the next value
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, TagResult{
|
||||
Confidence: conf,
|
||||
Overlap: overlapTags,
|
||||
Package: Package{
|
||||
Name: string(iter.Key()),
|
||||
// We need to do a deep copy here because we previously
|
||||
// used an unsafe operation to convert the tag data to
|
||||
// a string, and the values created by that will be
|
||||
// invalidated when the iterator is closed.
|
||||
Tags: cloneStringSlice(ptags),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if err := iter.Error(); err != nil {
|
||||
errs <- err
|
||||
iter.Close()
|
||||
return
|
||||
}
|
||||
|
||||
iter.Close()
|
||||
resultsMtx.Lock()
|
||||
results = append(results, out...)
|
||||
resultsMtx.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-errs:
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
case <-done:
|
||||
SortResults(results)
|
||||
return results, time.Since(start), nil
|
||||
}
|
||||
|
||||
SortResults(results)
|
||||
return results, time.Since(start), nil
|
||||
}
|
||||
|
||||
// SortResults sorts tag results by confidence
|
||||
func SortResults(results []TagResult) {
|
||||
slices.SortFunc(results, func(a, b TagResult) int {
|
||||
if a.Confidence < b.Confidence {
|
||||
return 1
|
||||
} else if a.Confidence > b.Confidence {
|
||||
return -1
|
||||
} else {
|
||||
return strings.Compare(a.Package.Name, b.Package.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// cloneStringSlice creates a deep copy of a slice of strings
|
||||
func cloneStringSlice(s []string) []string {
|
||||
out := make([]string, len(s))
|
||||
for i := 0; i < len(s); i++ {
|
||||
out[i] = strings.Clone(s[i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
389
internal/store/store.go
Normal file
389
internal/store/store.go
Normal file
@@ -0,0 +1,389 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/cockroachdb/pebble"
|
||||
"github.com/zeebo/sbloom"
|
||||
"go.elara.ws/distrohop/internal/index"
|
||||
)
|
||||
|
||||
// ErrBlocked is returned when a store's database is being updated
|
||||
var ErrBlocked = errors.New("database is being updated; please try again later")
|
||||
|
||||
func init() {
|
||||
gob.Register(&xxhash.Digest{})
|
||||
}
|
||||
|
||||
// Package represents a software package with a name and associated tags
|
||||
type Package struct {
|
||||
// The name of the package
|
||||
Name string
|
||||
// A list of tags associated with the package
|
||||
Tags []string
|
||||
}
|
||||
|
||||
type nopLogger struct{}
|
||||
|
||||
func (nopLogger) Infof(string, ...any) {}
|
||||
func (nopLogger) Fatalf(string, ...any) {}
|
||||
|
||||
// ReadOnly represents a read-only package store
|
||||
type ReadOnly interface {
|
||||
GetPkg(name string) (Package, error)
|
||||
GetPkgNamesByPrefix(prefix string, n int) ([]string, error)
|
||||
Search(tags []string) ([]TagResult, time.Duration, error)
|
||||
}
|
||||
|
||||
// Store represents persistent storage for package data
|
||||
type Store struct {
|
||||
Path string
|
||||
db *pebble.DB
|
||||
|
||||
// blocked is used to ensure that all other operations finish
|
||||
// running before a [Store.Replace] operation, and cannot
|
||||
// start running until the replace operation is completed.
|
||||
// [Store.Replace] does a write lock on the RWMutex. All
|
||||
// other operations do read locks.
|
||||
blocked sync.RWMutex
|
||||
|
||||
// SearchThreads is the number of worker goroutines to be used
|
||||
// for searching the database for a tag. The default is 4.
|
||||
SearchThreads int
|
||||
}
|
||||
|
||||
// Open initializes and opens a [Store] at the specified path
|
||||
func Open(path string) (*Store, error) {
|
||||
db, err := pebble.Open(path, &pebble.Options{Logger: nopLogger{}})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Store{
|
||||
Path: path,
|
||||
db: db,
|
||||
SearchThreads: 4,
|
||||
}, err
|
||||
}
|
||||
|
||||
// WriteBatch writes a batch of index records to the store.
|
||||
// It merges existing tags with new ones and ensures they're unique.
|
||||
func (s *Store) WriteBatch(batch map[string]index.Record, filters map[byte]*sbloom.Filter) error {
|
||||
if !s.blocked.TryRLock() {
|
||||
return ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
b := s.db.NewBatch()
|
||||
defer b.Close()
|
||||
|
||||
for _, item := range batch {
|
||||
if len(item.Name) == 0 || len(item.Tags) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := unsafeBytes(item.Name)
|
||||
|
||||
curVal, cl, err := s.db.Get(key)
|
||||
if err == pebble.ErrNotFound {
|
||||
// Remove any duplicate tags
|
||||
slices.Sort(item.Tags)
|
||||
tags := slices.Compact(item.Tags)
|
||||
// Write the new package to the database
|
||||
err := b.Set(key, joinTags(item.Name[0], tags, filters), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err != nil {
|
||||
return err
|
||||
} else {
|
||||
// Since the package already exists in the database, combine its existing
|
||||
// tags with the ones we just got
|
||||
tags := strings.Split(unsafeString(curVal), "\x1F")
|
||||
tags = append(tags, item.Tags...)
|
||||
// Remove any duplicate tags
|
||||
slices.Sort(tags)
|
||||
tags = slices.Compact(tags)
|
||||
// Write the updated package to the database
|
||||
err := b.Set(key, joinTags(item.Name[0], tags, filters), nil)
|
||||
if err != nil {
|
||||
cl.Close()
|
||||
return err
|
||||
}
|
||||
cl.Close()
|
||||
}
|
||||
}
|
||||
// Commit the batch to persistent storage
|
||||
return b.Commit(nil)
|
||||
}
|
||||
|
||||
// WriteFilters writes bloom filters for each package name starting character
|
||||
// to the database.
|
||||
func (s *Store) WriteFilters(filters map[byte]*sbloom.Filter) error {
|
||||
if !s.blocked.TryRLock() {
|
||||
return ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
for firstChar, filter := range filters {
|
||||
data, err := filter.GobEncode()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.db.Set([]byte{0x02, firstChar}, data, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetFilter gets the bloom filter for the given first package name character
|
||||
// from the database.
|
||||
func (s *Store) GetFilter(firstChar byte) (*sbloom.Filter, error) {
|
||||
if !s.blocked.TryRLock() {
|
||||
return nil, ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
data, cl, err := s.db.Get([]byte{0x02, firstChar})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer cl.Close()
|
||||
|
||||
filter := &sbloom.Filter{}
|
||||
err = filter.GobDecode(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
// joinTags converts the given tags to bytes, joins them with \x1F as the separator,
|
||||
// and updates the correct bloom filter for the first character of the package name.
|
||||
func joinTags(firstChar byte, tags []string, filters map[byte]*sbloom.Filter) []byte {
|
||||
if _, ok := filters[firstChar]; !ok {
|
||||
filters[firstChar] = sbloom.NewFilter(xxhash.New(), 10)
|
||||
}
|
||||
out := &bytes.Buffer{}
|
||||
for i, tag := range tags {
|
||||
btag := unsafeBytes(tag)
|
||||
filters[firstChar].Add(btag)
|
||||
out.Write(btag)
|
||||
if i != len(tags)-1 {
|
||||
out.WriteByte(0x1F)
|
||||
}
|
||||
}
|
||||
return out.Bytes()
|
||||
}
|
||||
|
||||
// GetPkg retrieves a package from the store by its name
|
||||
func (s *Store) GetPkg(name string) (Package, error) {
|
||||
if !s.blocked.TryRLock() {
|
||||
return Package{}, ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
data, cl, err := s.db.Get(unsafeBytes(name))
|
||||
if err != nil {
|
||||
return Package{}, err
|
||||
}
|
||||
defer cl.Close()
|
||||
|
||||
return Package{
|
||||
Name: name,
|
||||
Tags: strings.Split(string(data), "\x1F"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Store) GetPkgNamesByPrefix(prefix string, n int) ([]string, error) {
|
||||
if !s.blocked.TryRLock() {
|
||||
return nil, ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
out := make([]string, 0, n)
|
||||
|
||||
iter, err := s.db.NewIter(&pebble.IterOptions{
|
||||
LowerBound: unsafeBytes(prefix),
|
||||
UpperBound: append(unsafeBytes(prefix[:len(prefix)-1]), prefix[len(prefix)-1]+1),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer iter.Close()
|
||||
|
||||
i := 0
|
||||
for iter.First(); iter.Valid(); iter.Next() {
|
||||
if i == n-1 {
|
||||
break
|
||||
}
|
||||
out = append(out, string(iter.Key()))
|
||||
i++
|
||||
}
|
||||
|
||||
if err := iter.Error(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// metaKey is the database key for repository metadata
|
||||
var metaKey = []byte("\x02META")
|
||||
|
||||
// RepoMeta represents repository metadata
|
||||
type RepoMeta struct {
|
||||
ETag string
|
||||
LastModified time.Time
|
||||
}
|
||||
|
||||
// WriteMeta writes the repository metadata to the database
|
||||
func (s *Store) WriteMeta(meta RepoMeta) error {
|
||||
if !s.blocked.TryRLock() {
|
||||
return ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
data, err := json.Marshal(meta)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return s.db.Set(metaKey, data, nil)
|
||||
}
|
||||
|
||||
// GetMeta reads the repository metadata from the database
|
||||
func (s *Store) GetMeta() (RepoMeta, error) {
|
||||
if !s.blocked.TryRLock() {
|
||||
return RepoMeta{}, ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
|
||||
data, cl, err := s.db.Get(metaKey)
|
||||
if err != nil {
|
||||
return RepoMeta{}, err
|
||||
}
|
||||
defer cl.Close()
|
||||
var meta RepoMeta
|
||||
err = json.Unmarshal(data, &meta)
|
||||
return meta, err
|
||||
}
|
||||
|
||||
// Replace atomically replaces the database from s with the database from s2.
|
||||
// The store is blocked during the replacement, causing any concurrent operations
|
||||
// to fail with [ErrBlocked]. The replacement operation closes and moves s2's
|
||||
// database, so s2 is no longer usable after this operation.
|
||||
//
|
||||
// This function attempts to roll back in case of partial failures. However, cleanup
|
||||
// failures may result in leftover temporary files.
|
||||
func (s *Store) Replace(s2 *Store) error {
|
||||
// Clean up any leftover old db files. We don't need to lock at this
|
||||
// point because concurrent operations are still safe to execute.
|
||||
oldPath := filepath.Join(filepath.Dir(s.Path), "db-old")
|
||||
if err := os.RemoveAll(oldPath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Do a write lock, which will prevent any new operations
|
||||
// from executing and block until all existing operations
|
||||
// complete.
|
||||
s.blocked.Lock()
|
||||
|
||||
if err := s2.db.Close(); err != nil {
|
||||
s.blocked.Unlock()
|
||||
return err
|
||||
}
|
||||
if err := s.db.Close(); err != nil {
|
||||
s.blocked.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Rename(s.Path, oldPath); err != nil {
|
||||
s.blocked.Unlock()
|
||||
return err
|
||||
}
|
||||
if err := os.Rename(s2.Path, s.Path); err != nil {
|
||||
s.blocked.Unlock()
|
||||
return errors.Join(err, os.Rename(oldPath, s.Path))
|
||||
}
|
||||
|
||||
db, err := pebble.Open(s.Path, &pebble.Options{Logger: nopLogger{}})
|
||||
if err != nil {
|
||||
s.blocked.Unlock()
|
||||
return err
|
||||
}
|
||||
s.db = db
|
||||
|
||||
// We can unlock here even though there's more work to do because the replace
|
||||
// operation itself is complete and concurrent operations are now safe to
|
||||
// execute again.
|
||||
s.blocked.Unlock()
|
||||
|
||||
return os.RemoveAll(oldPath)
|
||||
}
|
||||
|
||||
// Close closes the underlying database
|
||||
func (s *Store) Close() error {
|
||||
if !s.blocked.TryRLock() {
|
||||
return ErrBlocked
|
||||
}
|
||||
defer s.blocked.RUnlock()
|
||||
return s.db.Close()
|
||||
}
|
||||
|
||||
// overlap calculates the overlap between two sets of tags.
|
||||
// It returns the list of overlapping tags and a confidence score.
|
||||
func overlap(stags, ptags []string) ([]string, float32) {
|
||||
var overlapTags []string
|
||||
for _, stag := range stags {
|
||||
if slices.Contains(ptags, stag) {
|
||||
overlapTags = append(overlapTags, stag)
|
||||
}
|
||||
}
|
||||
return overlapTags, float32(len(overlapTags)) / float32(len(stags))
|
||||
}
|
||||
|
||||
// unsafeBytes converts a string to a byte slice using unsafe operations
|
||||
func unsafeBytes(data string) []byte {
|
||||
return unsafe.Slice(unsafe.StringData(data), len(data))
|
||||
}
|
||||
|
||||
// unsafeString converts a byte slice to a string using unsafe operations
|
||||
func unsafeString(data []byte) string {
|
||||
return unsafe.String(unsafe.SliceData(data), len(data))
|
||||
}
|
||||
168
internal/tags/tags.go
Normal file
168
internal/tags/tags.go
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* distrohop - A utility for correlating and identifying equivalent software
|
||||
* packages across different Linux distributions
|
||||
*
|
||||
* Copyright (C) 2025 Elara Ivy <elara@elara.ws>
|
||||
*
|
||||
* This file is part of distrohop.
|
||||
*
|
||||
* distrohop is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* distrohop is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with distrohop. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package tags
|
||||
|
||||
import (
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Generate generates a list of tags based on the input filename.
|
||||
func Generate(filePath string) (tags []string) {
|
||||
lastSlash := strings.LastIndexByte(filePath, '/')
|
||||
name, dir := filePath[lastSlash+1:], filePath[:lastSlash]
|
||||
pathElems := strings.Split(dir, "/")
|
||||
added := false
|
||||
for _, elem := range pathElems {
|
||||
switch elem {
|
||||
case "usr", "opt", "local", "share":
|
||||
// Skip directories that we don't care about
|
||||
continue
|
||||
case "bin", "sbin":
|
||||
tags = append(tags, "bin="+name)
|
||||
added = true
|
||||
case "icons", "pixmaps":
|
||||
switch path.Ext(name) {
|
||||
case ".svg", ".png", ".jpg", ".jpeg":
|
||||
tags = append(tags, "icon="+name)
|
||||
added = true
|
||||
}
|
||||
case "man":
|
||||
if manName := manualName(name); manName != "" {
|
||||
tags = append(tags, "man="+manName)
|
||||
added = true
|
||||
}
|
||||
case "dist-packages", "site-packages":
|
||||
if pyName := pythonName(filePath); pyName != "" {
|
||||
tags = append(tags, "py="+pyName)
|
||||
added = true
|
||||
}
|
||||
case "pkgconfig", "pkg-config":
|
||||
if path.Ext(name) == ".pc" {
|
||||
tags = append(tags, "pkgcfg="+strings.TrimSuffix(name, ".pc"))
|
||||
added = true
|
||||
}
|
||||
case "applications":
|
||||
if path.Ext(name) == ".desktop" {
|
||||
tags = append(tags, "desktop="+strings.TrimSuffix(name, ".desktop"))
|
||||
added = true
|
||||
}
|
||||
case "dbus-1":
|
||||
if path.Ext(name) == ".service" {
|
||||
tags = append(tags, "dbus="+strings.TrimSuffix(name, ".service"))
|
||||
added = true
|
||||
}
|
||||
case "systemd":
|
||||
switch path.Ext(name) {
|
||||
case ".service", ".target", ".socket", ".timer":
|
||||
tags = append(tags, "systemd="+name)
|
||||
added = true
|
||||
}
|
||||
case "include":
|
||||
switch path.Ext(name) {
|
||||
case ".h", ".hh", ".hpp", ".hxx", "h++":
|
||||
_, hdrName, ok := strings.Cut(filePath, "include/")
|
||||
if !ok {
|
||||
hdrName = name
|
||||
}
|
||||
tags = append(tags, "hdr="+hdrName)
|
||||
added = true
|
||||
}
|
||||
case "lib", "lib32", "lib64":
|
||||
if libName, soversion, ok := strings.Cut(name, ".so"); ok && soversionIsValid(soversion) {
|
||||
tags = append(tags, "lib="+name)
|
||||
lastChar := name[len(name)-1]
|
||||
if lastChar >= '0' || lastChar <= '9' {
|
||||
tags = append(tags, "lib="+libName+".so")
|
||||
canonicalLibName := strings.TrimPrefix(libName, "lib")
|
||||
tags = append(tags, "lib="+canonicalLibName)
|
||||
}
|
||||
added = true
|
||||
} else if path.Ext(name) == ".a" {
|
||||
tags = append(tags, "lib="+name)
|
||||
tags = append(tags, "lib="+strings.TrimSuffix(name, ".a"))
|
||||
added = true
|
||||
}
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if added {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !added {
|
||||
tags = append(tags, "file="+filePath)
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
func manualName(fileName string) string {
|
||||
fileName = strings.TrimSuffix(fileName, ".gz")
|
||||
ext := path.Ext(fileName)
|
||||
if len(ext) == 0 || !isNum(ext[1:]) {
|
||||
return ""
|
||||
}
|
||||
return fileName
|
||||
}
|
||||
|
||||
func pythonName(filePath string) string {
|
||||
for _, start := range [...]string{"/dist-packages/", "/site-packages/"} {
|
||||
start := strings.Index(filePath, start)
|
||||
if start == -1 {
|
||||
continue
|
||||
}
|
||||
start += 15
|
||||
end := strings.Index(filePath[start:], "/")
|
||||
if end == -1 {
|
||||
continue
|
||||
}
|
||||
return filePath[start : start+end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func soversionIsValid(s string) bool {
|
||||
if s == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, elem := range strings.Split(s, ".") {
|
||||
if !isNum(elem) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func isNum(s string) bool {
|
||||
for i := range s {
|
||||
if s[i] < '0' || s[i] > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
Reference in New Issue
Block a user