利用DeepSeek用两种方法编写go语言zstd实用程序
1.使用github.com/valyala/gozstd包
这是一个用go语言包装c语言zstd源代码的包,DeepSeek给出的调用此包的程序如下:
package mainimport ("flag""fmt"
// "io""os""path/filepath"
// "github.com/valyala/gozstd""gozstd"
)var (compressLevel intdecompress boolforce booloutputFile string
)func init() {flag.IntVar(&compressLevel, "level", 3, "compression level (1-19)")flag.BoolVar(&decompress, "d", false, "decompress")flag.BoolVar(&force, "f", false, "force overwrite")flag.StringVar(&outputFile, "o", "", "output file")flag.Usage = func() {fmt.Fprintf(os.Stderr, "gozstd version - Usage: %s [OPTIONS] INPUT [OUTPUT]\n", os.Args[0])flag.PrintDefaults()}
}func main() {flag.Parse()if flag.NArg() < 1 {flag.Usage()os.Exit(1)}inputFile := flag.Arg(0)output := getOutputPath(inputFile)if !force && fileExists(output) {fmt.Fprintf(os.Stderr, "Error: output file exists (use -f to overwrite)\n")os.Exit(1)}var err errorif decompress {err = gozstdDecompress(inputFile, output)} else {if compressLevel < 1 || compressLevel > 19 {fmt.Fprintln(os.Stderr, "Error: invalid compression level (1-19)")os.Exit(1)}err = gozstdCompress(inputFile, output, compressLevel)}if err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}
}func gozstdCompress(input, output string, level int) error {data, err := os.ReadFile(input)if err != nil {return err}compressed := gozstd.CompressLevel(nil, data, level)return os.WriteFile(output, compressed, 0644)
}func gozstdDecompress(input, output string) error {compressed, err := os.ReadFile(input)if err != nil {return err}decompressed, err := gozstd.Decompress(nil, compressed)if err != nil {return err}return os.WriteFile(output, decompressed, 0644)
}// Helper functions
func getOutputPath(input string) string {if outputFile != "" {return outputFile}if decompress {return trimZstdExt(input)}return input + ".zst"
}func trimZstdExt(path string) string {ext := filepath.Ext(path)if ext == ".zst" || ext == ".zstd" {return path[:len(path)-len(ext)]}return path
}func fileExists(path string) bool {_, err := os.Stat(path)return !os.IsNotExist(err)
}
但是我直连github网站有问题,于是用如下命令从镜像网站克隆了github.com/valyala/gozstd源代码,并在其上级目录保存DeepSeek编写的程序gozip.go,但是编译报错,加上相对路径也不支持,只好将完整路径复制到编译器提示的目录下,提示有未使用的模块io
,将其注释掉,编译通过,压缩和解压测试也正常,因为解压的文件已存在,所以改名,易于比较文件大小。
git clone --depth=1 https://gitclone.com/github.com/valyala/gozstd../go/bin/go build -o gozip gozip.go
gozip.go:10:2: package gozstd is not in std (/par/go/src/gozstd)
../go/bin/go build -o gozip gozip.go
gozip.go:10:2: "./gozstd" is relative, but relative import paths are not supported in module modecp gozstd /par/go/src/ -R
../go/bin/go build -o gozip gozip.go
# command-line-arguments
./gozip.go:6:2: "io" imported and not used
ls
gozip.go gozstd
../go/bin/go build -o gozip gozip.go
ls
gozip gozip.go gozstd
./gozip
gozstd version - Usage: ./gozip [OPTIONS] INPUT [OUTPUT]-d decompress-f force overwrite-level intcompression level (1-19) (default 3)-o stringoutput file
cp ../varchar.txt .
./gozip varchar.txt./gozip -d varchar.txt.zst -o v.txt
Error: output file exists (use -f to overwrite)
mv varchar.txt.zst v.txt.zst
./gozip -d v.txt.zst
ls -l
total 158428
-rwxr-xr-x 1 root root 3825360 Aug 7 14:35 gozip
-rw-rw-r-- 1 1000 1000 2131 Aug 7 14:35 gozip.go
drwxr-xr-x 5 root root 4096 Aug 7 14:08 gozstd
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v.txt
-rw-r--r-- 1 root root 46459422 Aug 7 14:37 v.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:37 varchar.txt
2.使用github.com/klauspost/compress/包
这是一个用纯go语言编写zstd源代码的包,DeepSeek给出的调用此包的程序如下
package mainimport ("flag""fmt""io""os""path/filepath""github.com/klauspost/compress/zstd"
// "compress2/zstd"
)var (compressLevel intdecompress boolforce booloutputFile string
)func init() {flag.IntVar(&compressLevel, "level", 3, "compression level (1-19)")flag.BoolVar(&decompress, "d", false, "decompress")flag.BoolVar(&force, "f", false, "force overwrite")flag.StringVar(&outputFile, "o", "", "output file")flag.Usage = func() {fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS] INPUT [OUTPUT]\n", os.Args[0])fmt.Fprintln(os.Stderr, "Options:")flag.PrintDefaults()fmt.Fprintln(os.Stderr, "\nExamples:")fmt.Fprintln(os.Stderr, " Compress: zstdutil -level 5 file.txt")fmt.Fprintln(os.Stderr, " Decompress: zstdutil -d file.txt.zst")}
}func main() {flag.Parse()if flag.NArg() < 1 {flag.Usage()os.Exit(1)}inputFile := flag.Arg(0)var output stringif outputFile != "" {output = outputFile} else {if decompress {ext := filepath.Ext(inputFile)if ext != ".zst" && ext != ".zstd" {fmt.Fprintf(os.Stderr, "Error: input file must have .zst or .zstd extension for decompression\n")os.Exit(1)}output = inputFile[:len(inputFile)-len(ext)]} else {output = inputFile + ".zst"}}if !force {if _, err := os.Stat(output); err == nil {fmt.Fprintf(os.Stderr, "Error: output file %s already exists (use -f to overwrite)\n", output)os.Exit(1)}}if decompress {if err := decompressFile(inputFile, output); err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}} else {if compressLevel < 1 || compressLevel > 19 {fmt.Fprintln(os.Stderr, "Error: compression level must be between 1 and 19")os.Exit(1)}if err := compressFile(inputFile, output, compressLevel); err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}}
}func compressFile(input, output string, level int) error {inFile, err := os.Open(input)if err != nil {return err}defer inFile.Close()outFile, err := os.Create(output)if err != nil {return err}defer outFile.Close()encoder, err := zstd.NewWriter(outFile, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(level)))if err != nil {return err}defer encoder.Close()_, err = io.Copy(encoder, inFile)return err
}func decompressFile(input, output string) error {inFile, err := os.Open(input)if err != nil {return err}defer inFile.Close()outFile, err := os.Create(output)if err != nil {return err}defer outFile.Close()decoder, err := zstd.NewReader(inFile)if err != nil {return err}defer decoder.Close()_, err = io.Copy(outFile, decoder)return err
}
有了前面的经验,直接克隆和复制github镜像库,
git clone --depth=1 https://gitclone.com/github.com/klauspost/compress/cp compress/zstd /par/go/src/compress -R
注意zstd是compress下的一个子目录,而go安装包中已有/par/go/src/compress目录,所以把子目录zstd复制到/par/go/src/compress下以避免冲突。
但是其实不行,因为有依赖关系,即使把整个compress目录复制过去也不行。报错如下
../go/bin/go build -o zstdutil zstdutil.go
../go/src/compress/zstd/enc_best.go:11:2: no required module provides package github.com/klauspost/compress: go.mod file not found in current directory or any parent directory; see 'go help modules'
../go/src/compress/zstd/blockdec.go:14:2: no required module provides package github.com/klauspost/compress/huff0: go.mod file not found in current directory or any parent directory; see 'go help modules'
..cp compress /par/go/src/compress2 -R
../go/bin/go build -o zstdutil zstdutil.go
../go/src/compress2/zstd/enc_best.go:11:2: no required module provides package github.com/klauspost/compress: go.mod file not found in current directory or any parent directory; see 'go help modules'
此路不通,只好用DeepSeek介绍的另一种方法,不修改自己程序的包含路径,而使用go.mod文件中的replace
指令来替换目录,把zstd子目录下的go.mod复制到自己程序目录下,直接用报错:
../go/bin/go build -o zstdutil zstdutil.go
zstdutil.go:10:2: github.com/klauspost/compress@v1.15.15 (replaced by ../..): reading /go.mod: open /go.mod: no such file or directory
把最后一行改为:
replace github.com/klauspost/compress => ./compress
以引用当前目录下的compress目录。改后完整内容如下,也不知其他行有用没有
module github.com/klauspost/compress/s2/_generatego 1.22.0toolchain go1.22.4require (github.com/klauspost/compress v1.15.15github.com/mmcloughlin/avo v0.6.0
)require (golang.org/x/mod v0.21.0 // indirectgolang.org/x/sync v0.8.0 // indirectgolang.org/x/tools v0.25.0 // indirect
)replace github.com/klauspost/compress => ./compress
编译成功,测试通过,只是默认级别的压缩率为0,改为-level 9
就正常了。
../go/bin/go build -o zstdutil zstdutil.go
./zstdutil
Usage: ./zstdutil [OPTIONS] INPUT [OUTPUT]
Options:-d decompress-f force overwrite-level intcompression level (1-19) (default 3)-o stringoutput fileExamples:Compress: zstdutil -level 5 file.txtDecompress: zstdutil -d file.txt.zst
./zstdutil v.txt -o v2.txt.zst
Error: output file v.txt.zst already exists (use -f to overwrite)
mv v.txt v2.txt
./zstdutil v2.txt
mv v2.txt.zst v3.txt.zst
./zstdutil d v3.txt.zst
Error: open d: no such file or directory
./zstdutil -d v3.txt.zst
ls -l
total 271120
drwxr-xr-x 18 root root 4096 Aug 7 14:49 compress
-rw-rw-r-- 1 1000 1000 351 Aug 7 15:41 go.mod
-rwxr-xr-x 1 root root 3825360 Aug 7 14:35 gozip
-rw-rw-r-- 1 1000 1000 2131 Aug 7 14:35 gozip.go
drwxr-xr-x 5 root root 4096 Aug 7 14:08 gozstd
-rw-r--r-- 1 root root 46459422 Aug 7 14:37 v.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v2.txt
-rw-r--r-- 1 root root 55966906 Aug 7 15:46 v3.txt
-rw-r--r-- 1 root root 55968197 Aug 7 15:45 v3.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:37 varchar.txt
-rwxr-xr-x 1 root root 3441123 Aug 7 15:41 zstdutil
-rw-rw-r-- 1 1000 1000 2737 Aug 7 15:40 zstdutil.go
rm v3.*
./zstdutil -level 9 v2.txt
ls -l v2*
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v2.txt
-rw-r--r-- 1 root root 46459418 Aug 7 16:00 v2.txt.zst
55MB的文件测不出性能,用tpch 的2GB大小的lineitem文件测试结果如下
#使用带GLIBC低版本的环境
#zstd官方二进制文件
time zstd lineitem.csv
lineitem.csv : 30.66% ( 2.18 GiB => 685 MiB, lineitem.csv.zst) real 0m17.287s
user 0m26.980s
sys 0m2.028sls -l lineitem.csv.zst
-rw-r--r-- 1 kylin kylin 718508544 7月 17 10:30 lineitem.csv.zstrm lineitem.csv.zst
time gozstd/gozip lineitem.csv
gozstd/gozip: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.32' not found (required by gozstd/gozip)
gozstd/gozip: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by gozstd/gozip)real 0m0.026s
user 0m0.000s
sys 0m0.000s
time gozstd/zstdutil lineitem.csvreal 0m33.128s
user 0m34.316s
sys 0m1.992s
ls -l lineitem.csv.zst
-rw-rw-r-- 1 kylin kylin 726424595 8月 7 16:42 lineitem.csv.zst#使用带GLIBC高版本的环境
rm lineitem.csv.zst
time gozstd/gozip lineitem.csvreal 1m2.271s
user 0m34.496s
sys 0m19.860s
ls -l lineitem.csv.zst
-rw-r--r-- 1 root root 719013634 Aug 7 16:46 lineitem.csv.zst
工具 | czstd | gozstd | go调用czstd |
---|---|---|---|
压缩比 | 718 | 719 | 726 |
压缩时间 | 17 | 34 | 62 |