meta: add files

This commit is contained in:
Mark Joshwel 2025-01-20 04:52:09 +08:00
commit bd20e7d29a
11 changed files with 251 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

8
.idea/.gitignore generated vendored Normal file
View file

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

4
.idea/encodings.xml generated Normal file
View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
</project>

8
.idea/modules.xml generated Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/sidestepper.iml" filepath="$PROJECT_DIR$/.idea/sidestepper.iml" />
</modules>
</component>
</project>

11
.idea/sidestepper.iml generated Normal file
View file

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="EMPTY_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

7
Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "sidestepper"
version = "5.0.0"

6
Cargo.toml Normal file
View file

@ -0,0 +1,6 @@
[package]
name = "sidestepper"
version = "5.0.0"
edition = "2021"
[dependencies]

14
LICENCE Normal file
View file

@ -0,0 +1,14 @@
BSD Zero Clause License
Copyright (c) 2025 mark joshwel <mark@joshwel.co>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

65
README.md Normal file
View file

@ -0,0 +1,65 @@
# sota staircase SideStepper
a fast .gitignore-respecting large file finder for .git repositories trying to
weed out large LFS files
**this is brain made software**: large language-based code generation has not
directly used here. but i'd be lying if i said i didn't ask chatgpt if there
was a better way to check a boolean result lol
## quickstart
### installing a binary
**note:** all non-windows builds are statically linked
- Windows
- Linux
- macOS universal
- macOS amd64
- macOS aarch64
(also available in the 'releases' tab wherever this repository is situated in)
### build it yourself
1. [get rust and cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html#install-rust-and-cargo)
2. `cargo build release`
**nix users, rejoice:** `nix run github:markjoshwel/sidestepper` or `nix run git+:https://forge.joshwel.co/mark/sidestepper`
### running it
```text
./sidestepper
```
or on windows,
```text
./sidestepper.exe
```
it'll find for a `.git` directory in the current or parent directories, if you
want to use this not in the context i usually use this for, pass in
`--search-here` to treat the current working directory as the 'repository root'
it'll then make a `.sotaignore` file that i use in my other tooling,
but if you want output more friendly for integration in other places,
pass in `--plumbing` for it to output encountered large files, line-by-line, to
stdout
## historical changes
- v5 (i3/a4) - rewritten in rust lol
- v4 (i2/a4) - optimised single iod-ttt
- v3 (i2/a3) - faster matching by remembering ignored directories (ignore on demand, 'iod')
- v2 (i2/a2) - corrected ignored directory matching (named 'trytrytry')
- v1 (i1/a1) - original python script, still embedded within ReStepper
## licence
with all my heart, copyright (c) 2025 mark joshwel
the sota staircase SideStepper is permissively licenced, not needing
attribution, under the [0BSD licence](LICENCE). go ham.

121
src/main.rs Normal file
View file

@ -0,0 +1,121 @@
// sota staircase SideStepper
// a fast .gitignore-respecting large file finder
//
// Copyright (c) 2025 mark joshwel <mark@joshwel.co>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
// IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
use std::env;
use std::error::Error;
use std::path::{Path, PathBuf};
const SOTA_SIDESTEP_CHUNK_SIZE: u16 = 16;
const SOTA_SIDESTEP_MAX_WORKERS: u16 = 4;
const SOTA_SIDESTEP_LARGE_FILE_SIZE: u64 = 100000000; // 100mb
#[derive(Debug)]
struct Behaviour {
repo_dir_path: PathBuf,
repo_sotaignore_path: PathBuf,
parallel: bool,
chunk_size: u16,
max_workers: u16,
large_file_size: u64,
}
fn cli_get_behaviour() -> Result<Behaviour, Box<dyn Error>> {
// get environment variables
let chunk_size: u16 = match env::var("SOTA_SIDESTEP_CHUNK_SIZE") {
Ok(val) => val.parse::<u16>().unwrap_or(SOTA_SIDESTEP_CHUNK_SIZE),
Err(_) => SOTA_SIDESTEP_CHUNK_SIZE,
};
let max_workers: u16 = match env::var("SOTA_SIDESTEP_MAX_WORKERS") {
Ok(val) => val.parse::<u16>().unwrap_or(SOTA_SIDESTEP_MAX_WORKERS),
Err(_) => SOTA_SIDESTEP_MAX_WORKERS,
};
let large_file_size: u64 = match env::var("SOTA_SIDESTEP_LARGE_FILE_SIZE") {
Ok(val) => val.parse::<u64>().unwrap_or(SOTA_SIDESTEP_LARGE_FILE_SIZE),
Err(_) => SOTA_SIDESTEP_LARGE_FILE_SIZE,
};
let parallel: bool = 'get_parallel: {
// future me move this to a higher block if we ever need args
// anywhere else also what the hell, labeled blocks?
// huh -- the community seems wishy-washy on it,
// but this seems like a harmless use of em
let args: Vec<String> = env::args().collect();
if env::var("SOTA_SIDESTEP_PARALLEL").is_ok() {
break 'get_parallel true;
}
if args.iter().any(|arg| arg == "--parallel") {
break 'get_parallel true;
}
false
};
// find repo dir
// go through each parent dir until one of them has a .git directory in it
let current_dir = env::current_dir().unwrap();
let mut dir = current_dir.as_path();
let mut possible_repo_dir_path: Option<&Path> = None;
while dir.components().count() > 1 {
// check if there's a .git directory nearby
if dir.join(".git/").try_exists().ok() == Some(true) {
possible_repo_dir_path = Option::from(dir);
break;
}
// iterate down!
if let Some(parent) = dir.parent() {
dir = parent;
} else {
break;
}
}
if possible_repo_dir_path.is_none() {
return Err("could not find a .git repository in the current or parent directories".into());
}
let repo_dir_path = possible_repo_dir_path.unwrap();
Ok(Behaviour {
repo_dir_path: PathBuf::from(repo_dir_path),
repo_sotaignore_path: PathBuf::from(repo_dir_path.join(".sotaignore")),
parallel,
chunk_size,
max_workers,
large_file_size,
})
}
fn main() {
eprintln!("sota staircase SideStepper v5 (i3/a4)");
let behaviour = {
let behaviour = cli_get_behaviour();
// huh. pattern matching consumes the variable, so we ref (&) it. damn.
if let Err(e) = &behaviour {
eprintln!("critical error: {}", e);
std::process::exit(1);
}
behaviour.unwrap()
};
eprintln!(
" repo root : {}\n .sotaignore : {} ({})\n parallel : {}",
behaviour.repo_dir_path.to_str().unwrap(),
behaviour.repo_sotaignore_path.to_str().unwrap(),
{
if behaviour.repo_sotaignore_path.try_exists().ok() == Some(true) {
"exists"
} else {
"non-existent"
}
},
behaviour.parallel
);
}