From bd20e7d29a47f82b7690297ff6ec67c76f1cafc0 Mon Sep 17 00:00:00 2001 From: Mark Joshwel Date: Mon, 20 Jan 2025 04:52:09 +0800 Subject: [PATCH] meta: add files --- .gitignore | 1 + .idea/.gitignore | 8 +++ .idea/encodings.xml | 4 ++ .idea/modules.xml | 8 +++ .idea/sidestepper.iml | 11 ++++ .idea/vcs.xml | 6 +++ Cargo.lock | 7 +++ Cargo.toml | 6 +++ LICENCE | 14 +++++ README.md | 65 +++++++++++++++++++++++ src/main.rs | 121 ++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 251 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/encodings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/sidestepper.iml create mode 100644 .idea/vcs.xml create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENCE create mode 100644 README.md create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..7e5b7d7 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..df87cf9 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..4a9f1d8 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/sidestepper.iml b/.idea/sidestepper.iml new file mode 100644 index 0000000..cf84ae4 --- /dev/null +++ b/.idea/sidestepper.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..108bcdb --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "sidestepper" +version = "5.0.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a9ee9aa --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "sidestepper" +version = "5.0.0" +edition = "2021" + +[dependencies] diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..1a3cd04 --- /dev/null +++ b/LICENCE @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright (c) 2025 mark joshwel + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..30125c8 --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# sota staircase SideStepper + +a fast .gitignore-respecting large file finder for .git repositories trying to +weed out large LFS files + +**this is brain made software**: large language-based code generation has not +directly used here. but i'd be lying if i said i didn't ask chatgpt if there +was a better way to check a boolean result lol + +## quickstart + +### installing a binary + +**note:** all non-windows builds are statically linked + +- Windows +- Linux +- macOS universal +- macOS amd64 +- macOS aarch64 + +(also available in the 'releases' tab wherever this repository is situated in) + +### build it yourself + +1. [get rust and cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html#install-rust-and-cargo) +2. `cargo build release` + +**nix users, rejoice:** `nix run github:markjoshwel/sidestepper` or `nix run git+:https://forge.joshwel.co/mark/sidestepper` + +### running it + +```text +./sidestepper +``` + +or on windows, + +```text +./sidestepper.exe +``` + +it'll find for a `.git` directory in the current or parent directories, if you +want to use this not in the context i usually use this for, pass in +`--search-here` to treat the current working directory as the 'repository root' + +it'll then make a `.sotaignore` file that i use in my other tooling, +but if you want output more friendly for integration in other places, +pass in `--plumbing` for it to output encountered large files, line-by-line, to +stdout + +## historical changes + +- v5 (i3/a4) - rewritten in rust lol +- v4 (i2/a4) - optimised single iod-ttt +- v3 (i2/a3) - faster matching by remembering ignored directories (ignore on demand, 'iod') +- v2 (i2/a2) - corrected ignored directory matching (named 'trytrytry') +- v1 (i1/a1) - original python script, still embedded within ReStepper + +## licence + +with all my heart, copyright (c) 2025 mark joshwel + +the sota staircase SideStepper is permissively licenced, not needing +attribution, under the [0BSD licence](LICENCE). go ham. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..5996672 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,121 @@ +// sota staircase SideStepper +// a fast .gitignore-respecting large file finder +// +// Copyright (c) 2025 mark joshwel +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +// IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use std::env; +use std::error::Error; +use std::path::{Path, PathBuf}; + +const SOTA_SIDESTEP_CHUNK_SIZE: u16 = 16; +const SOTA_SIDESTEP_MAX_WORKERS: u16 = 4; +const SOTA_SIDESTEP_LARGE_FILE_SIZE: u64 = 100000000; // 100mb + +#[derive(Debug)] +struct Behaviour { + repo_dir_path: PathBuf, + repo_sotaignore_path: PathBuf, + parallel: bool, + chunk_size: u16, + max_workers: u16, + large_file_size: u64, +} + +fn cli_get_behaviour() -> Result> { + // get environment variables + let chunk_size: u16 = match env::var("SOTA_SIDESTEP_CHUNK_SIZE") { + Ok(val) => val.parse::().unwrap_or(SOTA_SIDESTEP_CHUNK_SIZE), + Err(_) => SOTA_SIDESTEP_CHUNK_SIZE, + }; + let max_workers: u16 = match env::var("SOTA_SIDESTEP_MAX_WORKERS") { + Ok(val) => val.parse::().unwrap_or(SOTA_SIDESTEP_MAX_WORKERS), + Err(_) => SOTA_SIDESTEP_MAX_WORKERS, + }; + let large_file_size: u64 = match env::var("SOTA_SIDESTEP_LARGE_FILE_SIZE") { + Ok(val) => val.parse::().unwrap_or(SOTA_SIDESTEP_LARGE_FILE_SIZE), + Err(_) => SOTA_SIDESTEP_LARGE_FILE_SIZE, + }; + let parallel: bool = 'get_parallel: { + // future me move this to a higher block if we ever need args + // anywhere else also what the hell, labeled blocks? + // huh -- the community seems wishy-washy on it, + // but this seems like a harmless use of em + let args: Vec = env::args().collect(); + if env::var("SOTA_SIDESTEP_PARALLEL").is_ok() { + break 'get_parallel true; + } + if args.iter().any(|arg| arg == "--parallel") { + break 'get_parallel true; + } + false + }; + + // find repo dir + // go through each parent dir until one of them has a .git directory in it + let current_dir = env::current_dir().unwrap(); + let mut dir = current_dir.as_path(); + let mut possible_repo_dir_path: Option<&Path> = None; + while dir.components().count() > 1 { + // check if there's a .git directory nearby + if dir.join(".git/").try_exists().ok() == Some(true) { + possible_repo_dir_path = Option::from(dir); + break; + } + + // iterate down! + if let Some(parent) = dir.parent() { + dir = parent; + } else { + break; + } + } + if possible_repo_dir_path.is_none() { + return Err("could not find a .git repository in the current or parent directories".into()); + } + let repo_dir_path = possible_repo_dir_path.unwrap(); + Ok(Behaviour { + repo_dir_path: PathBuf::from(repo_dir_path), + repo_sotaignore_path: PathBuf::from(repo_dir_path.join(".sotaignore")), + parallel, + chunk_size, + max_workers, + large_file_size, + }) +} + +fn main() { + eprintln!("sota staircase SideStepper v5 (i3/a4)"); + let behaviour = { + let behaviour = cli_get_behaviour(); + // huh. pattern matching consumes the variable, so we ref (&) it. damn. + if let Err(e) = &behaviour { + eprintln!("critical error: {}", e); + std::process::exit(1); + } + behaviour.unwrap() + }; + eprintln!( + " repo root : {}\n .sotaignore : {} ({})\n parallel : {}", + behaviour.repo_dir_path.to_str().unwrap(), + behaviour.repo_sotaignore_path.to_str().unwrap(), + { + if behaviour.repo_sotaignore_path.try_exists().ok() == Some(true) { + "exists" + } else { + "non-existent" + } + }, + behaviour.parallel + ); +}