I have not managed to find whether it is a truly reliable way, but so far my testing seems to confirm that.
We've ended up using the following script as a part of our build pipeline:
#See https://mcmap.net/q/1564890/-is-comparing-git-lfs-ls-files-with-git-ls-files-39-attr-filter-lfs-39-a-reliable-way-to-detect-lfs-files-that-are-not-managed-by-lfs
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
$ErrorActionPreference = "Stop";
function Assert-LastExitCodeIs0
{
if ($LASTEXITCODE -ne 0)
{
throw (New-Object System.InvalidOpertationException("LASTEXITCODE is $LASTEXITCODE"));
}
}
function Get-StringFromOctal
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$String
)
if ([String]::IsNullOrEmpty($String))
{
return $String;
}
if (($String[0] -ne '"') -or
($String[$String.Length - 1] -ne '"'))
{
return $String;
}
if ($String.Length -lt 2)
{
return $String;
}
$String = $String.Substring(1, $String.Length -2);
[regex] $regex = '(\\[0-9]+)+';
$encoding = [System.Text.Encoding]::UTF8;
return $regex.Replace(
$String,
{
Param ($Capture)
$bytes = $Capture.Groups[1].Captures |
ForEach-Object { [System.Convert]::ToByte($_.Value.Substring(1), 8) };
return $encoding.GetString(@($bytes));
});
}
function Normalize-GitLsFilesPath
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Normalize octets paths
# Technically we could try ls-files -z, but there is no such option for lfs ls-files.
# Or we could try to set core.quotePath to 0, but it brings a slew of problems with encodings and etc.
# And by doing so we would change git config settings, that is not very desirable, even if we will always(?) succeed in reverting it.
# So it is seems simpler just to parse octal paths.
return Get-StringFromOctal -String ($Path.Trim());
}
function Is-FileEmpty
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
return ((Get-Item -LiteralPath $Path).Length -eq 0);
}
function Is-NotEmptyLfsFile
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Empty files will be empty files whether they are under lfs or not - https://github.com/git-lfs/git-lfs/issues/2863
# But they seem not to get the lfs attributes, so we have no other choice but to filter them.
return !(Is-FileEmpty -Path $Path);
}
function Get-ActualLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting actual lfs file paths.';
$lfsLsFilesOutput = git lfs ls-files;
Assert-LastExitCodeIs0;
$actualLfsFilePaths = ($lfsLsFilesOutput | `
ForEach-Object `
{
#'12345678 * my.dll' - not all lfs versions support -n flag, so it is better to be conservative
$lsFilePath = $_.Split(' ', 3)[2];
return Normalize-GitLsFilesPath -Path $lsFilePath;
});
Write-Host "There are $($actualLfsFilePaths.Count) files that are actually under lfs";
return , $actualLfsFilePaths;
}
function Get-ShouldBeLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting should be under lfs file paths.';
[string[]] $shouldBeUnderLfsFilePaths = git ls-files -s ':(attr:filter=lfs)';
Assert-LastExitCodeIs0;
$shouldBeUnderLfsFilePaths = ($shouldBeUnderLfsFilePaths | `
ForEach-Object `
{
#120000 1234567890abcdef 0 mylib.dylib
, $_.Split($null, 4);
} | `
Where `
{
$symlinkBitMaskString = '120000';
$fileChmodString = $_[0];
return ($fileChmodString -ne $symlinkBitMaskString); # Perhaps we should check actual bitmask?
} | `
ForEach-Object `
{
Normalize-GitLsFilesPath -Path $_[3];
});
Write-Host "There are $($shouldBeUnderLfsFilePaths.Count) files that should be under lfs (may include flukes - empty files)";
return , $shouldBeUnderLfsFilePaths;
}
function Get-LfsFilePathsNotUnderLfs
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
[string[]] $actualLfsFilePaths = Get-ActualLfsFilePaths;
[string[]] $shouldBeUnderLfsFilePaths = Get-ShouldBeLfsFilePaths;
Add-Type -AssemblyName 'System.Linq';
Write-Host 'Comparing actual lfs and should be under lfs files.'
return , [System.Linq.Enumerable]::ToArray(
[System.Linq.Enumerable]::Where(
[System.Linq.Enumerable]::Except($shouldBeUnderLfsFilePaths, $actualLfsFilePaths),
[Func[String, Boolean]] ${function:Is-NotEmptyLfsFile}
)
);
}
function Main-WithRepositoryAsWorkingFolder
{
$filePathsNotUnderLfs = Get-LfsFilePathsNotUnderLfs;
$count = $filePathsNotUnderLfs.Count;
Write-Host "There are $($count) lfs files that are not under lfs";
foreach ($filePathNotUnderLfs in $filePathsNotUnderLfs)
{
Write-Host "`t'$filePathNotUnderLfs'";
}
if ($count -eq 0)
{
return 0;
}
return 1;
}
function Main
{
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
Write-Host "RepositoryLocalPath = $RepositoryLocalPath";
# It is simpler to set working dir rather than use Process.Start with WorkingDirectory everywhere, and more reliable than Start-Process (that one may in some cases have issues with return exit code).
$originalGetLocation = (Get-Location).Path;
Set-Location -LiteralPath $RepositoryLocalPath;
$originalCurrentDirectory = [System.IO.Directory]::GetCurrentDirectory();
[System.IO.Directory]::SetCurrentDirectory($RepositoryLocalPath);
# Otherwise we won't get proper file paths from git lfs ls-files, as it doesn't use octal encoding
# And using output redirection may lead us to even bigger problems|difficulties.
$originalOutputEncoding = $OutputEncoding;
$OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleOutputEncoding = [Console]::OutputEncoding;
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleInputEncoding = [Console]::InputEncoding;
[Console]::InputEncoding = [System.Text.Encoding]::UTF8;
try
{
return Main-WithRepositoryAsWorkingFolder;
}
catch
{
Write-Host "$_ $($_.ScriptStackTrace)";
return 2;
}
finally
{
Set-Location -LiteralPath $originalGetLocation;
[System.IO.Directory]::SetCurrentDirectory($originalCurrentDirectory);
$OutputEncoding = $originalOutputEncoding;
[Console]::OutputEncoding = $originalConsoleOutputEncoding;
[Console]::InputEncoding = $originalConsoleInputEncoding;
}
}
exit (Main -RepositoryLocalPath $RepositoryLocalPath);