diff --git a/Source/Editor/Content/Import/TextureImportEntry.cs b/Source/Editor/Content/Import/TextureImportEntry.cs
index 9842485c1..e7a007fb2 100644
--- a/Source/Editor/Content/Import/TextureImportEntry.cs
+++ b/Source/Editor/Content/Import/TextureImportEntry.cs
@@ -58,10 +58,10 @@ namespace FlaxEngine.Tools
FieldInfo[] fields = typeof(CustomMaxSizes).GetFields();
for (int i = 0; i < fields.Length; i++)
{
- var field = fields[i];
- if (field.Name.Equals("value__"))
+ var @field = fields[i];
+ if (@field.Name.Equals("value__"))
continue;
- if (value == (int)field.GetRawConstantValue())
+ if (value == (int)@field.GetRawConstantValue())
return (CustomMaxSizes)value;
}
return CustomMaxSizes._8192;
diff --git a/Source/Editor/Content/Items/VisualScriptItem.cs b/Source/Editor/Content/Items/VisualScriptItem.cs
index b99125f9b..54133e6e2 100644
--- a/Source/Editor/Content/Items/VisualScriptItem.cs
+++ b/Source/Editor/Content/Items/VisualScriptItem.cs
@@ -100,12 +100,16 @@ namespace FlaxEditor.Content
///
public object GetValue(object obj)
{
+ if (!_type.Asset)
+ throw new TargetException("Missing Visual Script asset.");
return _type.Asset.GetScriptInstanceParameterValue(_parameter.Name, (Object)obj);
}
///
public void SetValue(object obj, object value)
{
+ if (!_type.Asset)
+ throw new TargetException("Missing Visual Script asset.");
_type.Asset.SetScriptInstanceParameterValue(_parameter.Name, (Object)obj, value);
}
}
diff --git a/Source/Editor/Content/Proxy/JsonAssetProxy.cs b/Source/Editor/Content/Proxy/JsonAssetProxy.cs
index 12a54a030..27d0e2347 100644
--- a/Source/Editor/Content/Proxy/JsonAssetProxy.cs
+++ b/Source/Editor/Content/Proxy/JsonAssetProxy.cs
@@ -166,6 +166,25 @@ namespace FlaxEditor.Content
///
public override string Name { get; } = Utilities.Utils.GetPropertyNameUI(typeof(T).Name);
+ private SpriteHandle _thumbnail;
+
+ ///
+ /// Default Constructor.
+ ///
+ public SpawnableJsonAssetProxy()
+ {
+ _thumbnail = SpriteHandle.Invalid;
+ }
+
+ ///
+ /// Constructor with overriden thumbnail.
+ ///
+ /// The thumbnail to use.
+ public SpawnableJsonAssetProxy(SpriteHandle thumbnail)
+ {
+ _thumbnail = thumbnail;
+ }
+
///
public override bool CanCreate(ContentFolder targetLocation)
{
@@ -177,6 +196,12 @@ namespace FlaxEditor.Content
{
Editor.SaveJsonAsset(outputPath, new T());
}
+
+ ///
+ public override AssetItem ConstructItem(string path, string typeName, ref Guid id)
+ {
+ return _thumbnail.IsValid ? new JsonAssetItem(path, id, typeName, _thumbnail) : base.ConstructItem(path, typeName, ref id);
+ }
///
public override string TypeName { get; } = typeof(T).FullName;
diff --git a/Source/Editor/Content/Proxy/PrefabProxy.cs b/Source/Editor/Content/Proxy/PrefabProxy.cs
index c0c4e5c88..d2971f296 100644
--- a/Source/Editor/Content/Proxy/PrefabProxy.cs
+++ b/Source/Editor/Content/Proxy/PrefabProxy.cs
@@ -73,6 +73,16 @@ namespace FlaxEditor.Content
return targetLocation.CanHaveAssets;
}
+ ///
+ public override bool CanReimport(ContentItem item)
+ {
+ if (item is not PrefabItem prefabItem)
+ return base.CanReimport(item);
+
+ var prefab = FlaxEngine.Content.Load(prefabItem.ID);
+ return prefab.GetDefaultInstance().GetScript() != null;
+ }
+
///
public override void Create(string outputPath, object arg)
{
diff --git a/Source/Editor/CustomEditors/CustomEditor.cs b/Source/Editor/CustomEditors/CustomEditor.cs
index ca9bbc390..25b3dea0c 100644
--- a/Source/Editor/CustomEditors/CustomEditor.cs
+++ b/Source/Editor/CustomEditors/CustomEditor.cs
@@ -668,7 +668,7 @@ namespace FlaxEditor.CustomEditors
}
}
- if (obj == null || Values.Type.IsInstanceOfType(obj))
+ if ((obj == null && !Values.Type.IsValueType) || Values.Type.IsInstanceOfType(obj))
{
result = obj;
return true;
@@ -680,20 +680,7 @@ namespace FlaxEditor.CustomEditors
///
/// Gets a value indicating whether can paste value from the system clipboard to the property value container.
///
- public bool CanPaste
- {
- get
- {
- try
- {
- return GetClipboardObject(out _, false);
- }
- catch
- {
- return false;
- }
- }
- }
+ public bool CanPaste => !string.IsNullOrEmpty(Clipboard.Text);
///
/// Sets the value from the system clipboard.
diff --git a/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs b/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs
index 97b016fba..530e8c2b1 100644
--- a/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs
+++ b/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs
@@ -208,7 +208,7 @@ namespace FlaxEditor.CustomEditors.Editors
else
{
// Draw info
- Render2D.DrawText(style.FontMedium, "-", nameRect, isEnabled ? Color.OrangeRed : Color.DarkOrange, TextAlignment.Near, TextAlignment.Center);
+ Render2D.DrawText(style.FontMedium, Type != null ? $"None ({Utilities.Utils.GetPropertyNameUI(Type.ToString())})" : "-", nameRect, isEnabled ? Color.OrangeRed : Color.DarkOrange, TextAlignment.Near, TextAlignment.Center);
}
// Draw picker button
diff --git a/Source/Editor/CustomEditors/Editors/GenericEditor.cs b/Source/Editor/CustomEditors/Editors/GenericEditor.cs
index ec75926d9..f624600b4 100644
--- a/Source/Editor/CustomEditors/Editors/GenericEditor.cs
+++ b/Source/Editor/CustomEditors/Editors/GenericEditor.cs
@@ -474,32 +474,7 @@ namespace FlaxEditor.CustomEditors.Editors
}
if (layout.Editors.Count != 0)
{
- var sb = Clipboard.Text;
- if (!string.IsNullOrEmpty(sb))
- {
- try
- {
- var data = JsonSerializer.Deserialize(sb);
- if (data == null || data.Length != layout.Editors.Count)
- return false;
- for (var i = 0; i < layout.Editors.Count; i++)
- {
- Clipboard.Text = data[i];
- if (!layout.Editors[i].CanPaste)
- return false;
- }
- return true;
- }
- catch
- {
- return false;
- }
- finally
- {
- Clipboard.Text = sb;
- }
- }
- return false;
+ return !string.IsNullOrEmpty(Clipboard.Text);
}
if (layout.Children.Any(x => x is LayoutElementsContainer))
{
diff --git a/Source/Editor/GUI/ColumnDefinition.cs b/Source/Editor/GUI/ColumnDefinition.cs
index 241e6bec3..5c462c903 100644
--- a/Source/Editor/GUI/ColumnDefinition.cs
+++ b/Source/Editor/GUI/ColumnDefinition.cs
@@ -1,6 +1,7 @@
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
using FlaxEngine;
+using FlaxEngine.GUI;
namespace FlaxEditor.GUI
{
@@ -43,10 +44,20 @@ namespace FlaxEditor.GUI
public Color TitleColor = Color.White;
///
- /// The column title background background.
+ /// The column title background color.
///
public Color TitleBackgroundColor = Color.Brown;
+ ///
+ /// The column title horizontal text alignment
+ ///
+ public TextAlignment TitleAlignment = TextAlignment.Near;
+
+ ///
+ /// The column title margin.
+ ///
+ public Margin TitleMargin = new Margin(4, 4, 0, 0);
+
///
/// The minimum size (in pixels) of the column.
///
diff --git a/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs b/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs
index b19d7a9f1..9c268c033 100644
--- a/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs
+++ b/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs
@@ -40,6 +40,7 @@ namespace FlaxEditor.GUI.Dialogs
private bool _disableEvents;
private bool _useDynamicEditing;
private bool _activeEyedropper;
+ private bool _canPassLastChangeEvent = true;
private ColorValueBox.ColorPickerEvent _onChanged;
private ColorValueBox.ColorPickerClosedEvent _onClosed;
@@ -380,7 +381,7 @@ namespace FlaxEditor.GUI.Dialogs
{
for (int j = 0; j < numVer; j++)
{
- if ((i + j) % 2 == 0 )
+ if ((i + j) % 2 == 0)
{
var rect = new Rectangle(newRect.X + smallRectSize * i, newRect.Y + smallRectSize * j, new Float2(smallRectSize));
Render2D.FillRectangle(rect, Color.Gray);
@@ -395,7 +396,7 @@ namespace FlaxEditor.GUI.Dialogs
{
// Auto cancel on lost focus
#if !PLATFORM_LINUX
- ((WindowRootControl)Root).Window.LostFocus += OnCancel;
+ ((WindowRootControl)Root).Window.LostFocus += OnWindowLostFocus;
#endif
base.OnShow();
@@ -504,7 +505,7 @@ namespace FlaxEditor.GUI.Dialogs
BackgroundColorHighlighted = savedColor,
BackgroundColorSelected = savedColor.RGBMultiplied(0.8f),
};
- savedColorButton.ButtonClicked += (b) => OnSavedColorButtonClicked(b);
+ savedColorButton.ButtonClicked += OnSavedColorButtonClicked;
_savedColorButtons.Add(savedColorButton);
}
if (_savedColors.Count < 8)
@@ -516,11 +517,24 @@ namespace FlaxEditor.GUI.Dialogs
TooltipText = "Save Color.",
Tag = null,
};
- savedColorButton.ButtonClicked += (b) => OnSavedColorButtonClicked(b);
+ savedColorButton.ButtonClicked += OnSavedColorButtonClicked;
_savedColorButtons.Add(savedColorButton);
}
}
+ private void OnWindowLostFocus()
+ {
+ // Auto apply color on defocus
+ var autoAcceptColorPickerChange = Editor.Instance.Options.Options.Interface.AutoAcceptColorPickerChange;
+ if (_useDynamicEditing && _initialValue != _value && _canPassLastChangeEvent && autoAcceptColorPickerChange)
+ {
+ _canPassLastChangeEvent = false;
+ _onChanged?.Invoke(_value, false);
+ }
+
+ OnCancel();
+ }
+
///
public override void OnSubmit()
{
@@ -545,8 +559,9 @@ namespace FlaxEditor.GUI.Dialogs
_disableEvents = true;
// Restore color if modified
- if (_useDynamicEditing && _initialValue != _value)
+ if (_useDynamicEditing && _initialValue != _value && _canPassLastChangeEvent)
{
+ _canPassLastChangeEvent = false;
_onChanged?.Invoke(_initialValue, false);
}
diff --git a/Source/Editor/GUI/Table.cs b/Source/Editor/GUI/Table.cs
index 88c1d9e50..eb7aa3767 100644
--- a/Source/Editor/GUI/Table.cs
+++ b/Source/Editor/GUI/Table.cs
@@ -130,12 +130,14 @@ namespace FlaxEditor.GUI
var style = Style.Current;
var font = column.TitleFont ?? style.FontMedium;
- Render2D.DrawText(font, column.Title, rect, column.TitleColor, TextAlignment.Center, TextAlignment.Center);
+ var textRect = rect;
+ column.TitleMargin.ShrinkRectangle(ref textRect);
+ Render2D.DrawText(font, column.Title, textRect, column.TitleColor, column.TitleAlignment, TextAlignment.Center);
if (columnIndex < _columns.Length - 1)
{
- var splitRect = new Rectangle(rect.Right - 1, 2, 2, rect.Height - 4);
- Render2D.FillRectangle(splitRect, _movingSplit == columnIndex || splitRect.Contains(_mousePos) ? style.BorderNormal : column.TitleBackgroundColor * 0.9f);
+ var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4);
+ Render2D.FillRectangle(splitRect, _movingSplit == columnIndex || splitRect.Contains(_mousePos) ? style.BorderNormal : style.Background * 0.9f);
}
}
@@ -151,7 +153,7 @@ namespace FlaxEditor.GUI
{
rect.Width = GetColumnWidth(i);
- var splitRect = new Rectangle(rect.Right - 1, 2, 2, rect.Height - 4);
+ var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4);
if (splitRect.Contains(location))
{
// Start moving splitter
@@ -193,6 +195,31 @@ namespace FlaxEditor.GUI
PerformLayout();
}
+ else
+ {
+ if (_columns != null && _splits != null)
+ {
+ Rectangle rect = new Rectangle(0, 0, 0, _headerHeight);
+ for (int i = 0; i < _columns.Length - 1; i++)
+ {
+ rect.Width = GetColumnWidth(i);
+
+ var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4);
+ if (splitRect.Contains(location))
+ {
+ // Start moving splitter
+ Cursor = CursorType.SizeWE;
+ break;
+ }
+ else
+ {
+ Cursor = CursorType.Default;
+ }
+
+ rect.X += rect.Width;
+ }
+ }
+ }
base.OnMouseMove(location);
}
diff --git a/Source/Editor/Options/InterfaceOptions.cs b/Source/Editor/Options/InterfaceOptions.cs
index 360d1a7ed..7dec0f5cc 100644
--- a/Source/Editor/Options/InterfaceOptions.cs
+++ b/Source/Editor/Options/InterfaceOptions.cs
@@ -189,6 +189,13 @@ namespace FlaxEditor.Options
[EditorDisplay("Interface"), EditorOrder(280), Tooltip("Editor content window orientation.")]
public FlaxEngine.GUI.Orientation ContentWindowOrientation { get; set; } = FlaxEngine.GUI.Orientation.Horizontal;
+ ///
+ /// If checked, color pickers will always modify the color unless 'Cancel' if pressed, otherwise color won't change unless 'Ok' is pressed.
+ ///
+ [DefaultValue(true)]
+ [EditorDisplay("Interface"), EditorOrder(290)]
+ public bool AutoAcceptColorPickerChange { get; set; } = true;
+
///
/// Gets or sets the formatting option for numeric values in the editor.
///
diff --git a/Source/Editor/Scripting/ScriptType.cs b/Source/Editor/Scripting/ScriptType.cs
index 848fda30e..6d9db8ad6 100644
--- a/Source/Editor/Scripting/ScriptType.cs
+++ b/Source/Editor/Scripting/ScriptType.cs
@@ -51,12 +51,12 @@ namespace FlaxEditor.Scripting
int standardToken = _managed?.MetadataToken ?? _custom?.MetadataToken ?? 0;
if (_managed is PropertyInfo && _managed.DeclaringType != null)
{
- var field = _managed.DeclaringType.GetField(string.Format("<{0}>k__BackingField", Name), BindingFlags.Instance | BindingFlags.NonPublic);
- if (field == null || field.MetadataToken == 0)
+ var backingField = _managed.DeclaringType.GetField(string.Format("<{0}>k__BackingField", Name), BindingFlags.Instance | BindingFlags.NonPublic);
+ if (backingField == null || backingField.MetadataToken == 0)
{
return standardToken;
}
- return field.MetadataToken;
+ return backingField.MetadataToken;
}
return standardToken;
}
diff --git a/Source/Editor/Surface/Archetypes/Textures.cs b/Source/Editor/Surface/Archetypes/Textures.cs
index aca0a8f7a..728fb4595 100644
--- a/Source/Editor/Surface/Archetypes/Textures.cs
+++ b/Source/Editor/Surface/Archetypes/Textures.cs
@@ -438,6 +438,19 @@ namespace FlaxEditor.Surface.Archetypes
NodeElementArchetype.Factory.ComboBox(50, Surface.Constants.LayoutOffsetY * 4, 100, 0, typeof(CommonSamplerType))
}
},
+ new NodeArchetype
+ {
+ TypeID = 18,
+ Title = "Lightmap UV",
+ AlternativeTitles = new string[] { "Lightmap TexCoord" },
+ Description = "Lightmap UVs",
+ Flags = NodeFlags.MaterialGraph,
+ Size = new Float2(110, 30),
+ Elements = new []
+ {
+ NodeElementArchetype.Factory.Output(0, "UVs", typeof(Float2), 0)
+ }
+ }
};
}
}
diff --git a/Source/Editor/Surface/VisjectSurface.CopyPaste.cs b/Source/Editor/Surface/VisjectSurface.CopyPaste.cs
index 158492460..94d41a624 100644
--- a/Source/Editor/Surface/VisjectSurface.CopyPaste.cs
+++ b/Source/Editor/Surface/VisjectSurface.CopyPaste.cs
@@ -190,15 +190,7 @@ namespace FlaxEditor.Surface
if (data == null || data.Length < 2)
return false;
- try
- {
- var model = JsonConvert.DeserializeObject(data);
- return model?.Nodes != null && model.Nodes.Length != 0;
- }
- catch (Exception)
- {
- return false;
- }
+ return true;
}
///
@@ -215,7 +207,15 @@ namespace FlaxEditor.Surface
try
{
// Load Mr Json
- var model = FlaxEngine.Json.JsonSerializer.Deserialize(data);
+ DataModel model;
+ try
+ {
+ model = FlaxEngine.Json.JsonSerializer.Deserialize(data);
+ }
+ catch
+ {
+ return;
+ }
if (model.Nodes == null)
model.Nodes = new DataModelNode[0];
diff --git a/Source/Editor/Viewport/EditorGizmoViewport.cs b/Source/Editor/Viewport/EditorGizmoViewport.cs
index f0681e48d..0a0591015 100644
--- a/Source/Editor/Viewport/EditorGizmoViewport.cs
+++ b/Source/Editor/Viewport/EditorGizmoViewport.cs
@@ -360,11 +360,36 @@ namespace FlaxEditor.Viewport
};
// Setup input actions
- viewport.InputActions.Add(options => options.TranslateMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Translate);
- viewport.InputActions.Add(options => options.RotateMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Rotate);
- viewport.InputActions.Add(options => options.ScaleMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Scale);
+ viewport.InputActions.Add(options => options.TranslateMode, () =>
+ {
+ viewport.GetInput(out var input);
+ if (input.IsMouseRightDown)
+ return;
+
+ transformGizmo.ActiveMode = TransformGizmoBase.Mode.Translate;
+ });
+ viewport.InputActions.Add(options => options.RotateMode, () =>
+ {
+ viewport.GetInput(out var input);
+ if (input.IsMouseRightDown)
+ return;
+
+ transformGizmo.ActiveMode = TransformGizmoBase.Mode.Rotate;
+ });
+ viewport.InputActions.Add(options => options.ScaleMode, () =>
+ {
+ viewport.GetInput(out var input);
+ if (input.IsMouseRightDown)
+ return;
+
+ transformGizmo.ActiveMode = TransformGizmoBase.Mode.Scale;
+ });
viewport.InputActions.Add(options => options.ToggleTransformSpace, () =>
{
+ viewport.GetInput(out var input);
+ if (input.IsMouseRightDown)
+ return;
+
transformGizmo.ToggleTransformSpace();
if (useProjectCache)
editor.ProjectCache.SetCustomData("TransformSpaceState", transformGizmo.ActiveTransformSpace.ToString());
diff --git a/Source/Editor/Windows/ContentWindow.ContextMenu.cs b/Source/Editor/Windows/ContentWindow.ContextMenu.cs
index f29f97e66..182467566 100644
--- a/Source/Editor/Windows/ContentWindow.ContextMenu.cs
+++ b/Source/Editor/Windows/ContentWindow.ContextMenu.cs
@@ -309,6 +309,23 @@ namespace FlaxEditor.Windows
{
if (selection[i] is BinaryAssetItem binaryAssetItem)
Editor.ContentImporting.Reimport(binaryAssetItem);
+ else if (selection[i] is PrefabItem prefabItem)
+ {
+ var prefab = FlaxEngine.Content.Load(prefabItem.ID);
+ var modelPrefab = prefab.GetDefaultInstance().GetScript();
+ if (!modelPrefab)
+ continue;
+ var importPath = modelPrefab.ImportPath;
+ var editor = Editor.Instance;
+ if (editor.ContentImporting.GetReimportPath("Model Prefab", ref importPath))
+ continue;
+ var folder = editor.ContentDatabase.Find(Path.GetDirectoryName(prefab.Path)) as ContentFolder;
+ if (folder == null)
+ continue;
+ var importOptions = modelPrefab.ImportOptions;
+ importOptions.Type = FlaxEngine.Tools.ModelTool.ModelType.Prefab;
+ editor.ContentImporting.Import(importPath, folder, true, importOptions);
+ }
}
}
diff --git a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs
index f4a49195f..8d31b2744 100644
--- a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs
+++ b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs
@@ -54,8 +54,6 @@ namespace FlaxEditor.Windows
// Basic editing options
b = contextMenu.AddButton("Rename", inputOptions.Rename, Rename);
- b.Enabled = isSingleActorSelected;
-
b = contextMenu.AddButton("Duplicate", inputOptions.Duplicate, Editor.SceneEditing.Duplicate);
b.Enabled = hasSthSelected;
diff --git a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs
new file mode 100644
index 000000000..6278a5f3b
--- /dev/null
+++ b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs
@@ -0,0 +1,246 @@
+using System.Text;
+using FlaxEngine;
+using FlaxEngine.GUI;
+using FlaxEditor.GUI;
+
+namespace FlaxEditor.Windows
+{
+ ///
+ /// A window used to rename multiple actors.
+ ///
+ public class RenameWindow : EditorWindow
+ {
+ ///
+ private class RenameUndoAction : IUndoAction
+ {
+ ///
+ /// The old actors name to use on action.
+ ///
+ public string[] OldNames;
+
+ ///
+ /// The new actors name to use on action.
+ ///
+ public string[] NewNames;
+
+ ///
+ /// All actors to rename.
+ ///
+ public Actor[] ActorsToRename;
+
+ ///
+ /// Create a undo action.
+ ///
+ ///
+ public RenameUndoAction(Actor[] nodes)
+ {
+ ActorsToRename = nodes;
+ OldNames = new string[nodes.Length];
+
+ for (int i = 0; i < nodes.Length; i++)
+ OldNames[i] = nodes[i].Name;
+ }
+
+ ///
+ public void Do()
+ {
+ for (int i = 0; i < ActorsToRename.Length; i++)
+ ActorsToRename[i].Name = NewNames[i];
+ }
+
+ ///
+ public void Undo()
+ {
+ for (int i = 0; i < ActorsToRename.Length; i++)
+ ActorsToRename[i].Name = OldNames[i];
+ }
+
+ ///
+ public string ActionString => "Renaming actors.";
+
+ ///
+ public void Dispose() { }
+ }
+
+ ///
+ /// Rename options.
+ ///
+ private enum RenameOptions
+ {
+ OnlyName,
+ UsePrefix,
+ UseSuffix
+ }
+
+ private string _newActorsName;
+ private RenameOptions _renameOption;
+ private Actor[] _actorsToRename;
+
+ private static RenameWindow _currentOpenedWindow;
+
+ private RenameWindow(Actor[] actorsToRename, Editor editor) : base(editor, true, FlaxEngine.GUI.ScrollBars.None)
+ {
+ Title = "Rename";
+ Size = new Float2(300, 110);
+
+ _newActorsName = "Actor ";
+ _renameOption = RenameOptions.UseSuffix;
+ _actorsToRename = actorsToRename;
+
+ var container = new VerticalPanel
+ {
+ Parent = this,
+ AnchorPreset = AnchorPresets.StretchAll,
+ Offset = Vector2.Zero,
+ AutoSize = false,
+ Bounds = Rectangle.Empty
+ };
+
+ var nameContainer = new HorizontalPanel
+ {
+ Parent = container,
+ AnchorPreset = AnchorPresets.TopLeft,
+ Bounds = new Rectangle(0, 0, 300, 22),
+ Offset = Vector2.Zero,
+ AutoSize = false,
+ Spacing = 2,
+ CullChildren = false,
+ ClipChildren = false,
+ };
+
+ var optionsContainer = new HorizontalPanel
+ {
+ Parent = container,
+ AnchorPreset = AnchorPresets.TopLeft,
+ Bounds = new Rectangle(0, 22, 300, 22),
+ Offset = Vector2.Zero,
+ AutoSize = false,
+ Spacing = 2,
+ CullChildren = false,
+ ClipChildren = false,
+ };
+
+ var renameLabel = new Label
+ {
+ Text = "New Name",
+ AnchorPreset = AnchorPresets.Custom,
+ AnchorMin = Float2.Zero,
+ AnchorMax = new Float2(0.5f, 0),
+ Parent = nameContainer,
+ HorizontalAlignment = TextAlignment.Near,
+ Size = new Float2(150, 22),
+ Offsets = Margin.Zero,
+ };
+
+ var newNameTextBox = new TextBox
+ {
+ Text = _newActorsName,
+ AnchorPreset = AnchorPresets.Custom,
+ AnchorMin = new Float2(0.5f, 0),
+ AnchorMax = new Float2(1, 0),
+ Parent = nameContainer,
+ Size = new Float2(150, 22),
+ Offsets = Margin.Zero,
+ };
+
+ var optionNameLabel = new Label
+ {
+ Text = "Rename Option",
+ HorizontalAlignment = TextAlignment.Near,
+ AnchorPreset = AnchorPresets.Custom,
+ AnchorMin = Float2.Zero,
+ AnchorMax = new Float2(0.5f, 0),
+ Parent = optionsContainer,
+ Size = new Float2(150, 22),
+ Offsets = Margin.Zero,
+ };
+
+ var renameOptions = new EnumComboBox(typeof(RenameOptions))
+ {
+ Parent = optionsContainer,
+ Value = (int)_renameOption,
+ AnchorPreset = AnchorPresets.Custom,
+ AnchorMin = new Float2(0.5f, 0f),
+ AnchorMax = new Float2(1, 0),
+ Size = new Float2(150, 22),
+ Offsets = Margin.Zero,
+ };
+
+ var renameButton = new Button
+ {
+ Text = "Rename",
+ AnchorPreset = AnchorPresets.TopLeft,
+ Parent = container,
+ };
+
+ newNameTextBox.TextBoxEditEnd += textBox =>
+ {
+ _newActorsName = textBox.Text;
+ };
+
+ renameOptions.EnumValueChanged += combo =>
+ {
+ _renameOption = (RenameOptions)combo.Value;
+ };
+
+ newNameTextBox.Focus();
+ newNameTextBox.KeyDown += k =>
+ {
+ if (k == KeyboardKeys.Return)
+ {
+ _newActorsName = newNameTextBox.Text;
+ RenameActors();
+ }
+ };
+
+ renameButton.Clicked += RenameActors;
+ }
+
+ private void RenameActors()
+ {
+ var renameUndoAction = new RenameUndoAction(_actorsToRename);
+ Editor.Instance.SceneEditing.Undo.AddAction(renameUndoAction);
+ renameUndoAction.NewNames = new string[_actorsToRename.Length];
+ for (int i = 0; i < _actorsToRename.Length; i++)
+ {
+ var actor = _actorsToRename[i];
+ if (!actor)
+ continue;
+ var newName = new StringBuilder(_newActorsName);
+ if (_renameOption == RenameOptions.UsePrefix)
+ {
+ newName = new StringBuilder();
+ newName.Append(i);
+ newName.Append(_newActorsName);
+ }
+ else if (_renameOption == RenameOptions.UseSuffix)
+ newName.Append(i.ToString());
+
+ var newNameStr = newName.ToString();
+ actor.Name = newNameStr;
+ renameUndoAction.NewNames[i] = newNameStr;
+ }
+ Editor.Instance.Scene.MarkAllScenesEdited();
+ Close();
+ }
+
+ ///
+ /// Create an instance of the to rename actors and show the window.
+ ///
+ /// All actors to rename
+ /// The editor.
+ public static void Show(Actor[] actorsToRename, Editor editor)
+ {
+ // Can only one window opened.
+ if (_currentOpenedWindow != null)
+ _currentOpenedWindow.Close(ClosingReason.CloseEvent);
+
+ _currentOpenedWindow = new RenameWindow(actorsToRename, editor);
+ _currentOpenedWindow.ShowFloating(new Float2(300, 110));
+ _currentOpenedWindow.RootWindow.Window.Closed += () =>
+ {
+ _currentOpenedWindow = null;
+ };
+ }
+ }
+}
diff --git a/Source/Editor/Windows/SceneTreeWindow.cs b/Source/Editor/Windows/SceneTreeWindow.cs
index 8408f574e..42dc0ffbe 100644
--- a/Source/Editor/Windows/SceneTreeWindow.cs
+++ b/Source/Editor/Windows/SceneTreeWindow.cs
@@ -146,10 +146,24 @@ namespace FlaxEditor.Windows
private void Rename()
{
var selection = Editor.SceneEditing.Selection;
- if (selection.Count != 0 && selection[0] is ActorNode actor)
+ var selectionCount = selection.Count;
+
+ // Show a window with options to rename multiple actors.
+ if (selectionCount > 1)
{
- if (selection.Count != 0)
- Editor.SceneEditing.Select(actor);
+ var selectedActors = new Actor[selectionCount];
+
+ for (int i = 0; i < selectionCount; i++)
+ if (selection[i] is ActorNode actorNode)
+ selectedActors[i] = actorNode.Actor;
+
+ RenameWindow.Show(selectedActors, Editor);
+ return;
+ }
+
+ if (selectionCount != 0 && selection[0] is ActorNode actor)
+ {
+ Editor.SceneEditing.Select(actor);
actor.TreeNode.StartRenaming(this, _sceneTreePanel);
}
}
diff --git a/Source/Engine/AI/BehaviorKnowledge.h b/Source/Engine/AI/BehaviorKnowledge.h
index aef894c73..e89b3de4b 100644
--- a/Source/Engine/AI/BehaviorKnowledge.h
+++ b/Source/Engine/AI/BehaviorKnowledge.h
@@ -83,13 +83,13 @@ public:
/// Checks if knowledge has a given goal (exact type match without base class check).
///
/// The goal type.
- /// True if has a given goal, otherwise false.
+ /// True if knowledge has a given goal, otherwise false.
API_FUNCTION() bool HasGoal(ScriptingTypeHandle type) const;
///
/// Checks if knowledge has a given goal (exact type match without base class check).
///
- /// True if has a given goal, otherwise false.
+ /// True if knowledge has a given goal, otherwise false.
template
FORCE_INLINE bool HasGoal()
{
diff --git a/Source/Engine/AI/BehaviorTreeNodes.cpp b/Source/Engine/AI/BehaviorTreeNodes.cpp
index 02080da7d..e5b9d8d60 100644
--- a/Source/Engine/AI/BehaviorTreeNodes.cpp
+++ b/Source/Engine/AI/BehaviorTreeNodes.cpp
@@ -140,7 +140,7 @@ void BehaviorTreeNode::Deserialize(DeserializeStream& stream, ISerializeModifier
{
SerializableScriptingObject::Deserialize(stream, modifier);
- Name.Clear(); // Missing Name is assumes as unnamed node
+ Name.Clear(); // Missing Name is assumed as unnamed node
DESERIALIZE(Name);
}
@@ -197,7 +197,6 @@ BehaviorUpdateResult BehaviorTreeSequenceNode::Update(const BehaviorUpdateContex
return BehaviorUpdateResult::Failed;
auto result = Children[state->CurrentChildIndex]->InvokeUpdate(context);
-
switch (result)
{
case BehaviorUpdateResult::Success:
@@ -232,7 +231,6 @@ BehaviorUpdateResult BehaviorTreeSelectorNode::Update(const BehaviorUpdateContex
return BehaviorUpdateResult::Failed;
auto result = Children[state->CurrentChildIndex]->InvokeUpdate(context);
-
switch (result)
{
case BehaviorUpdateResult::Success:
diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp
index 857e288fc..616fbe91a 100644
--- a/Source/Engine/Content/Assets/VisualScript.cpp
+++ b/Source/Engine/Content/Assets/VisualScript.cpp
@@ -1522,12 +1522,16 @@ void VisualScript::unload(bool isReloading)
if (_scriptingTypeHandle)
{
VisualScriptingBinaryModule::Locker.Lock();
- auto& type = VisualScriptingModule.Types[_scriptingTypeHandle.TypeIndex];
+ ScriptingType& type = VisualScriptingModule.Types[_scriptingTypeHandle.TypeIndex];
if (type.Script.DefaultInstance)
{
Delete(type.Script.DefaultInstance);
type.Script.DefaultInstance = nullptr;
}
+ char* typeName = (char*)Allocator::Allocate(sizeof(_typenameChars));
+ Platform::MemoryCopy(typeName, _typenameChars, sizeof(_typenameChars));
+ ((StringAnsiView&)type.Fullname) = StringAnsiView(typeName, 32);
+ VisualScriptingModule._unloadedScriptTypeNames.Add(typeName);
VisualScriptingModule.TypeNameToTypeIndex.RemoveValue(_scriptingTypeHandle.TypeIndex);
VisualScriptingModule.Scripts[_scriptingTypeHandle.TypeIndex] = nullptr;
_scriptingTypeHandleCached = _scriptingTypeHandle;
@@ -1653,6 +1657,8 @@ VisualScriptingBinaryModule::VisualScriptingBinaryModule()
ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const ScriptingObjectSpawnParams& params)
{
// Create native object (base type can be C++ or C#)
+ if (params.Type.Module == nullptr)
+ return nullptr;
ScriptingType& visualScriptType = (ScriptingType&)params.Type.GetType();
ScriptingTypeHandle baseTypeHandle = visualScriptType.GetBaseType();
const ScriptingType* baseTypePtr = &baseTypeHandle.GetType();
@@ -1663,9 +1669,7 @@ ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const Scri
}
ScriptingObject* object = baseTypePtr->Script.Spawn(params);
if (!object)
- {
return nullptr;
- }
// Beware! Hacking vtables incoming! Undefined behaviors exploits! Low-level programming!
visualScriptType.HackObjectVTable(object, baseTypeHandle, 1);
@@ -2060,6 +2064,11 @@ void VisualScriptingBinaryModule::Destroy(bool isReloading)
return;
BinaryModule::Destroy(isReloading);
+
+ // Free cached script typenames table
+ for (char* str : _unloadedScriptTypeNames)
+ Allocator::Free(str);
+ _unloadedScriptTypeNames.Clear();
}
ScriptingTypeHandle VisualScript::GetScriptingType()
diff --git a/Source/Engine/Content/Assets/VisualScript.h b/Source/Engine/Content/Assets/VisualScript.h
index b6bda5ffb..934e6917e 100644
--- a/Source/Engine/Content/Assets/VisualScript.h
+++ b/Source/Engine/Content/Assets/VisualScript.h
@@ -303,6 +303,7 @@ class FLAXENGINE_API VisualScriptingBinaryModule : public BinaryModule
friend VisualScript;
private:
StringAnsi _name;
+ Array _unloadedScriptTypeNames;
public:
///
diff --git a/Source/Engine/Content/JsonAssetReference.cs b/Source/Engine/Content/JsonAssetReference.cs
index a3a17fe2b..59cf55cc1 100644
--- a/Source/Engine/Content/JsonAssetReference.cs
+++ b/Source/Engine/Content/JsonAssetReference.cs
@@ -12,6 +12,7 @@ namespace FlaxEngine
#if FLAX_EDITOR
[CustomEditor(typeof(FlaxEditor.CustomEditors.Editors.AssetRefEditor))]
#endif
+ [Newtonsoft.Json.JsonConverter(typeof(Json.JsonAssetReferenceConverter))]
public struct JsonAssetReference : IComparable, IComparable>, IEquatable>
{
///
diff --git a/Source/Engine/Core/Collections/Array.h b/Source/Engine/Core/Collections/Array.h
index 40a458ece..7281fa8d8 100644
--- a/Source/Engine/Core/Collections/Array.h
+++ b/Source/Engine/Core/Collections/Array.h
@@ -579,7 +579,7 @@ public:
/// Insert the given item at specified index with keeping items order.
///
/// The zero-based index at which item should be inserted.
- /// The item to insert.
+ /// The item to be inserted by copying.
void Insert(int32 index, const T& item)
{
ASSERT(index >= 0 && index <= _count);
@@ -592,6 +592,23 @@ public:
data[index] = item;
}
+ ///
+ /// Insert the given item at specified index with keeping items order.
+ ///
+ /// The zero-based index at which item should be inserted.
+ /// The item to inserted by moving.
+ void Insert(int32 index, T&& item)
+ {
+ ASSERT(index >= 0 && index <= _count);
+ EnsureCapacity(_count + 1);
+ T* data = _allocation.Get();
+ Memory::ConstructItems(data + _count, 1);
+ for (int32 i = _count - 1; i >= index; i--)
+ data[i + 1] = MoveTemp(data[i]);
+ _count++;
+ data[index] = MoveTemp(item);
+ }
+
///
/// Insert the given item at specified index with keeping items order.
///
@@ -772,9 +789,9 @@ public:
///
/// Performs pop from stack operation (stack grows at the end of the collection).
///
- T Pop()
+ FORCE_INLINE T Pop()
{
- T item(Last());
+ T item = MoveTemp(Last());
RemoveLast();
return item;
}
@@ -807,6 +824,15 @@ public:
Add(item);
}
+ ///
+ /// Performs enqueue to queue operation (queue head is in the beginning of queue).
+ ///
+ /// The item to append.
+ void Enqueue(T&& item)
+ {
+ Add(MoveTemp(item));
+ }
+
///
/// Performs dequeue from queue operation (queue head is in the beginning of queue).
///
@@ -814,7 +840,7 @@ public:
T Dequeue()
{
ASSERT(HasItems());
- T item(First());
+ T item = MoveTemp(_allocation.Get()[0]);
RemoveAtKeepOrder(0);
return item;
}
diff --git a/Source/Engine/Core/Math/FloatR10G10B10A2.cs b/Source/Engine/Core/Math/FloatR10G10B10A2.cs
index ab89b3962..8cd482127 100644
--- a/Source/Engine/Core/Math/FloatR10G10B10A2.cs
+++ b/Source/Engine/Core/Math/FloatR10G10B10A2.cs
@@ -12,7 +12,7 @@ namespace FlaxEngine
[StructLayout(LayoutKind.Sequential, Pack = 4)]
public struct FloatR10G10B10A2
{
- private uint value;
+ private uint rawValue;
///
/// Initializes a new instance of the structure.
@@ -23,7 +23,7 @@ namespace FlaxEngine
/// The floating point value that should be stored in A component (2 bit format).
public FloatR10G10B10A2(float x, float y, float z, float w)
{
- value = Pack(x, y, z, w);
+ rawValue = Pack(x, y, z, w);
}
///
@@ -33,7 +33,7 @@ namespace FlaxEngine
/// The floating point value that should be stored in alpha component (2 bit format).
public FloatR10G10B10A2(Float3 value, float w = 0)
{
- this.value = Pack(value.X, value.Y, value.Z, w);
+ rawValue = Pack(value.X, value.Y, value.Z, w);
}
///
@@ -42,37 +42,33 @@ namespace FlaxEngine
/// The floating point value that should be stored in 10 bit format.
public FloatR10G10B10A2(Float4 value)
{
- this.value = Pack(value.X, value.Y, value.Z, value.W);
+ rawValue = Pack(value.X, value.Y, value.Z, value.W);
}
///
/// Gets or sets the raw 32 bit value used to back this vector.
///
- public uint RawValue
- {
- get => value;
- set => this.value = value;
- }
+ public uint RawValue => rawValue;
///
/// Gets the R component.
///
- public float R => (value & 0x3FF) / 1023.0f;
+ public float R => (rawValue & 0x3FF) / 1023.0f;
///
/// Gets the G component.
///
- public float G => ((value >> 10) & 0x3FF) / 1023.0f;
+ public float G => ((rawValue >> 10) & 0x3FF) / 1023.0f;
///
/// Gets the B component.
///
- public float B => ((value >> 20) & 0x3FF) / 1023.0f;
+ public float B => ((rawValue >> 20) & 0x3FF) / 1023.0f;
///
/// Gets the A component.
///
- public float A => (value >> 30) / 3.0f;
+ public float A => (rawValue >> 30) / 3.0f;
///
/// Performs an explicit conversion from to .
@@ -102,7 +98,7 @@ namespace FlaxEngine
/// true if has the same value as ; otherwise, false.
public static bool operator ==(FloatR10G10B10A2 left, FloatR10G10B10A2 right)
{
- return left.value == right.value;
+ return left.rawValue == right.rawValue;
}
///
@@ -113,7 +109,7 @@ namespace FlaxEngine
/// true if has a different value than ; otherwise, false.
public static bool operator !=(FloatR10G10B10A2 left, FloatR10G10B10A2 right)
{
- return left.value != right.value;
+ return left.rawValue != right.rawValue;
}
///
@@ -131,7 +127,7 @@ namespace FlaxEngine
/// A 32-bit signed integer hash code.
public override int GetHashCode()
{
- return value.GetHashCode();
+ return rawValue.GetHashCode();
}
///
@@ -142,7 +138,7 @@ namespace FlaxEngine
/// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false.
public static bool Equals(ref FloatR10G10B10A2 value1, ref FloatR10G10B10A2 value2)
{
- return value1.value == value2.value;
+ return value1.rawValue == value2.rawValue;
}
///
@@ -152,7 +148,7 @@ namespace FlaxEngine
/// true if the current instance is equal to the specified object; false otherwise.
public bool Equals(FloatR10G10B10A2 other)
{
- return other.value == value;
+ return other.rawValue == rawValue;
}
///
@@ -162,7 +158,7 @@ namespace FlaxEngine
/// true if the current instance is equal to the specified object; false otherwise.
public override bool Equals(object obj)
{
- return obj is FloatR10G10B10A2 other && value == other.value;
+ return obj is FloatR10G10B10A2 other && rawValue == other.rawValue;
}
private static uint Pack(float x, float y, float z, float w)
@@ -191,11 +187,11 @@ namespace FlaxEngine
{
Float3 vectorOut;
- uint tmp = value & 0x3FF;
+ uint tmp = rawValue & 0x3FF;
vectorOut.X = tmp / 1023.0f;
- tmp = (value >> 10) & 0x3FF;
+ tmp = (rawValue >> 10) & 0x3FF;
vectorOut.Y = tmp / 1023.0f;
- tmp = (value >> 20) & 0x3FF;
+ tmp = (rawValue >> 20) & 0x3FF;
vectorOut.Z = tmp / 1023.0f;
return vectorOut;
@@ -209,13 +205,13 @@ namespace FlaxEngine
{
Float4 vectorOut;
- uint tmp = value & 0x3FF;
+ uint tmp = rawValue & 0x3FF;
vectorOut.X = tmp / 1023.0f;
- tmp = (value >> 10) & 0x3FF;
+ tmp = (rawValue >> 10) & 0x3FF;
vectorOut.Y = tmp / 1023.0f;
- tmp = (value >> 20) & 0x3FF;
+ tmp = (rawValue >> 20) & 0x3FF;
vectorOut.Z = tmp / 1023.0f;
- vectorOut.W = (value >> 30) / 3.0f;
+ vectorOut.W = (rawValue >> 30) / 3.0f;
return vectorOut;
}
diff --git a/Source/Engine/Core/Math/FloatR11G11B10.cs b/Source/Engine/Core/Math/FloatR11G11B10.cs
index ccaf31024..524f16096 100644
--- a/Source/Engine/Core/Math/FloatR11G11B10.cs
+++ b/Source/Engine/Core/Math/FloatR11G11B10.cs
@@ -21,7 +21,7 @@ namespace FlaxEngine
{
// Reference: [https://github.com/Microsoft/DirectXMath/blob/master/Inc/DirectXPackedVector.h]
- private uint value;
+ private uint rawValue;
///
/// Initializes a new instance of the structure.
@@ -31,7 +31,7 @@ namespace FlaxEngine
/// The floating point value that should be stored in B component (10 bits format).
public FloatR11G11B10(float x, float y, float z)
{
- value = Pack(x, y, z);
+ rawValue = Pack(x, y, z);
}
///
@@ -40,17 +40,13 @@ namespace FlaxEngine
/// The floating point value that should be stored in compressed format.
public FloatR11G11B10(Float3 value)
{
- this.value = Pack(value.X, value.Y, value.Z);
+ rawValue = Pack(value.X, value.Y, value.Z);
}
///
/// Gets or sets the raw 32 bit value used to back this vector.
///
- public uint RawValue
- {
- get => value;
- set => this.value = value;
- }
+ public uint RawValue => rawValue;
///
/// Performs an explicit conversion from to .
@@ -80,7 +76,7 @@ namespace FlaxEngine
/// true if has the same value as ; otherwise, false.
public static bool operator ==(FloatR11G11B10 left, FloatR11G11B10 right)
{
- return left.value == right.value;
+ return left.rawValue == right.rawValue;
}
///
@@ -91,7 +87,7 @@ namespace FlaxEngine
/// true if has a different value than ; otherwise, false.
public static bool operator !=(FloatR11G11B10 left, FloatR11G11B10 right)
{
- return left.value != right.value;
+ return left.rawValue != right.rawValue;
}
///
@@ -109,7 +105,7 @@ namespace FlaxEngine
/// A 32-bit signed integer hash code.
public override int GetHashCode()
{
- return value.GetHashCode();
+ return rawValue.GetHashCode();
}
///
@@ -120,7 +116,7 @@ namespace FlaxEngine
/// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false.
public static bool Equals(ref FloatR11G11B10 value1, ref FloatR11G11B10 value2)
{
- return value1.value == value2.value;
+ return value1.rawValue == value2.rawValue;
}
///
@@ -130,7 +126,7 @@ namespace FlaxEngine
/// true if the current instance is equal to the specified object; false otherwise.
public bool Equals(FloatR11G11B10 other)
{
- return other.value == value;
+ return other.rawValue == rawValue;
}
///
@@ -140,7 +136,7 @@ namespace FlaxEngine
/// true if the current instance is equal to the specified object; false otherwise.
public override bool Equals(object obj)
{
- return obj is FloatR11G11B10 other && value == other.value;
+ return obj is FloatR11G11B10 other && rawValue == other.rawValue;
}
private static unsafe uint Pack(float x, float y, float z)
@@ -288,7 +284,7 @@ namespace FlaxEngine
{
int zeroExponent = -112;
- Packed packed = new Packed(value);
+ Packed packed = new Packed(rawValue);
uint* result = stackalloc uint[4];
uint exponent;
diff --git a/Source/Engine/Core/Math/Half.cs b/Source/Engine/Core/Math/Half.cs
index da10d76e9..5177e7365 100644
--- a/Source/Engine/Core/Math/Half.cs
+++ b/Source/Engine/Core/Math/Half.cs
@@ -38,7 +38,7 @@ namespace FlaxEngine
[StructLayout(LayoutKind.Sequential, Pack = 2)]
public struct Half
{
- private ushort value;
+ private ushort rawValue;
///
/// Number of decimal digits of precision.
@@ -111,17 +111,13 @@ namespace FlaxEngine
/// The floating point value that should be stored in 16 bit format.
public Half(float value)
{
- this.value = HalfUtils.Pack(value);
+ rawValue = HalfUtils.Pack(value);
}
///
/// Gets or sets the raw 16 bit value used to back this half-float.
///
- public ushort RawValue
- {
- get => value;
- set => this.value = value;
- }
+ public ushort RawValue => rawValue;
///
/// Converts an array of half precision values into full precision values.
@@ -166,7 +162,7 @@ namespace FlaxEngine
/// The converted value.
public static implicit operator float(Half value)
{
- return HalfUtils.Unpack(value.value);
+ return HalfUtils.Unpack(value.rawValue);
}
///
@@ -177,7 +173,7 @@ namespace FlaxEngine
/// true if has the same value as ; otherwise, false.
public static bool operator ==(Half left, Half right)
{
- return left.value == right.value;
+ return left.rawValue == right.rawValue;
}
///
@@ -188,7 +184,7 @@ namespace FlaxEngine
/// true if has a different value than ; otherwise, false.
public static bool operator !=(Half left, Half right)
{
- return left.value != right.value;
+ return left.rawValue != right.rawValue;
}
///
@@ -207,7 +203,7 @@ namespace FlaxEngine
/// A 32-bit signed integer hash code.
public override int GetHashCode()
{
- ushort num = value;
+ ushort num = rawValue;
return (((num * 3) / 2) ^ num);
}
@@ -219,7 +215,7 @@ namespace FlaxEngine
/// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false.
public static bool Equals(ref Half value1, ref Half value2)
{
- return value1.value == value2.value;
+ return value1.rawValue == value2.rawValue;
}
///
@@ -229,7 +225,7 @@ namespace FlaxEngine
/// true if the current instance is equal to the specified object; false otherwise.
public bool Equals(Half other)
{
- return other.value == value;
+ return other.rawValue == rawValue;
}
///
@@ -248,7 +244,7 @@ namespace FlaxEngine
return false;
}
Half half = (Half)obj;
- return half.value == value;
+ return half.rawValue == rawValue;
}
static Half()
diff --git a/Source/Engine/Core/Math/Quaternion.cpp b/Source/Engine/Core/Math/Quaternion.cpp
index 5927e0665..5eae439dd 100644
--- a/Source/Engine/Core/Math/Quaternion.cpp
+++ b/Source/Engine/Core/Math/Quaternion.cpp
@@ -289,6 +289,7 @@ void Quaternion::Billboard(const Float3& objectPosition, const Float3& cameraPos
Quaternion Quaternion::FromDirection(const Float3& direction)
{
+ ASSERT(direction.IsNormalized());
Quaternion orientation;
if (Float3::Dot(direction, Float3::Up) >= 0.999f)
{
diff --git a/Source/Engine/Core/Math/Ray.h b/Source/Engine/Core/Math/Ray.h
index 59d080f56..2f8b4b907 100644
--- a/Source/Engine/Core/Math/Ray.h
+++ b/Source/Engine/Core/Math/Ray.h
@@ -46,6 +46,7 @@ public:
: Position(position)
, Direction(direction)
{
+ ASSERT(Direction.IsNormalized());
}
public:
diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp
index 5723fc265..ecce8a9da 100644
--- a/Source/Engine/Debug/DebugDraw.cpp
+++ b/Source/Engine/Debug/DebugDraw.cpp
@@ -946,7 +946,8 @@ void DebugDraw::DrawActors(Actor** selectedActors, int32 selectedActorsCount, bo
void DebugDraw::DrawAxisFromDirection(const Vector3& origin, const Vector3& direction, float size, float duration, bool depthTest)
{
- const auto rot = Quaternion::FromDirection(direction.GetNormalized());
+ ASSERT(direction.IsNormalized());
+ const auto rot = Quaternion::FromDirection(direction);
const Vector3 up = (rot * Vector3::Up);
const Vector3 forward = (rot * Vector3::Forward);
const Vector3 right = (rot * Vector3::Right);
@@ -971,16 +972,17 @@ void DebugDraw::DrawRay(const Vector3& origin, const Vector3& direction, const C
void DebugDraw::DrawRay(const Vector3& origin, const Vector3& direction, const Color& color, float length, float duration, bool depthTest)
{
+ ASSERT(direction.IsNormalized());
if (isnan(length) || isinf(length))
return;
- DrawLine(origin, origin + (direction.GetNormalized() * length), color, duration, depthTest);
+ DrawLine(origin, origin + (direction * length), color, duration, depthTest);
}
void DebugDraw::DrawRay(const Ray& ray, const Color& color, float length, float duration, bool depthTest)
{
if (isnan(length) || isinf(length))
return;
- DrawLine(ray.Position, ray.Position + (ray.Direction.GetNormalized() * length), color, duration, depthTest);
+ DrawLine(ray.Position, ray.Position + (ray.Direction * length), color, duration, depthTest);
}
void DebugDraw::DrawLine(const Vector3& start, const Vector3& end, const Color& color, float duration, bool depthTest)
diff --git a/Source/Engine/Engine/NativeInterop.Unmanaged.cs b/Source/Engine/Engine/NativeInterop.Unmanaged.cs
index 52218c2f4..62fd95117 100644
--- a/Source/Engine/Engine/NativeInterop.Unmanaged.cs
+++ b/Source/Engine/Engine/NativeInterop.Unmanaged.cs
@@ -850,11 +850,7 @@ namespace FlaxEngine.Interop
{
object fieldOwner = fieldOwnerHandle.Target;
FieldHolder field = Unsafe.As(fieldHandle.Target);
- object value = null;
- if (field.field.FieldType.IsValueType)
- value = Marshal.PtrToStructure(valuePtr, field.field.FieldType);
- else if (valuePtr != IntPtr.Zero)
- value = ManagedHandle.FromIntPtr(valuePtr).Target;
+ object value = MarshalToManaged(valuePtr, field.field.FieldType);
field.field.SetValue(fieldOwner, value);
}
diff --git a/Source/Engine/Graphics/Models/ModelData.Tool.cpp b/Source/Engine/Graphics/Models/ModelData.Tool.cpp
index be6b64398..a62fa6bae 100644
--- a/Source/Engine/Graphics/Models/ModelData.Tool.cpp
+++ b/Source/Engine/Graphics/Models/ModelData.Tool.cpp
@@ -15,12 +15,12 @@
#define USE_MIKKTSPACE 1
#include "ThirdParty/MikkTSpace/mikktspace.h"
#if USE_ASSIMP
-#define USE_SPARIAL_SORT 1
+#define USE_SPATIAL_SORT 1
#define ASSIMP_BUILD_NO_EXPORT
#include "Engine/Tools/ModelTool/SpatialSort.h"
//#include
#else
-#define USE_SPARIAL_SORT 0
+#define USE_SPATIAL_SORT 0
#endif
#include
@@ -155,18 +155,18 @@ bool MeshData::GenerateLightmapUVs()
}
int32 FindVertex(const MeshData& mesh, int32 vertexIndex, int32 startIndex, int32 searchRange, const Array& mapping
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
, const Assimp::SpatialSort& spatialSort
- , std::vector& sparialSortCache
+ , std::vector& spatialSortCache
#endif
)
{
const float uvEpsSqr = (1.0f / 250.0f) * (1.0f / 250.0f);
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
const Float3 vPosition = mesh.Positions[vertexIndex];
- spatialSort.FindPositions(*(aiVector3D*)&vPosition, 1e-4f, sparialSortCache);
- if (sparialSortCache.empty())
+ spatialSort.FindPositions(*(aiVector3D*)&vPosition, 1e-5f, spatialSortCache);
+ if (spatialSortCache.empty())
return INVALID_INDEX;
const Float2 vUV = mesh.UVs.HasItems() ? mesh.UVs[vertexIndex] : Float2::Zero;
@@ -177,9 +177,9 @@ int32 FindVertex(const MeshData& mesh, int32 vertexIndex, int32 startIndex, int3
const int32 end = startIndex + searchRange;
- for (size_t i = 0; i < sparialSortCache.size(); i++)
+ for (size_t i = 0; i < spatialSortCache.size(); i++)
{
- const int32 v = sparialSortCache[i];
+ const int32 v = spatialSortCache[i];
if (v < startIndex || v >= end)
continue;
#else
@@ -247,11 +247,11 @@ void MeshData::BuildIndexBuffer()
mapping.Resize(vertexCount);
int32 newVertexCounter = 0;
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
// Set up a SpatialSort to quickly find all vertices close to a given position
Assimp::SpatialSort vertexFinder;
vertexFinder.Fill((const aiVector3D*)Positions.Get(), vertexCount, sizeof(Float3));
- std::vector sparialSortCache;
+ std::vector spatialSortCache;
#endif
// Build index buffer
@@ -259,8 +259,8 @@ void MeshData::BuildIndexBuffer()
{
// Find duplicated vertex before the current one
const int32 reuseVertexIndex = FindVertex(*this, vertexIndex, 0, vertexIndex, mapping
-#if USE_SPARIAL_SORT
- , vertexFinder, sparialSortCache
+#if USE_SPATIAL_SORT
+ , vertexFinder, spatialSortCache
#endif
);
if (reuseVertexIndex == INVALID_INDEX)
@@ -304,18 +304,15 @@ void MeshData::BuildIndexBuffer()
dstBlendShape.Name = srcBlendShape.Name;
dstBlendShape.Weight = srcBlendShape.Weight;
- dstBlendShape.Vertices.Resize(newVertexCounter);
- for (int32 i = 0, j = 0; i < srcBlendShape.Vertices.Count(); i++)
+ dstBlendShape.Vertices.EnsureCapacity(srcBlendShape.Vertices.Count());
+ for (int32 i = 0; i < srcBlendShape.Vertices.Count(); i++)
{
- const auto idx = mapping[i];
- if (idx != INVALID_INDEX)
+ auto& v = srcBlendShape.Vertices[i];
+ int32 newVertexIndex = v.VertexIndex < (uint32)vertexCount ? mapping[v.VertexIndex] : INVALID_INDEX;
+ if (newVertexIndex != INVALID_INDEX)
{
- auto& v = srcBlendShape.Vertices[i];
- ASSERT_LOW_LAYER(v.VertexIndex < (uint32)vertexCount);
- ASSERT_LOW_LAYER(mapping[v.VertexIndex] != INVALID_INDEX);
- v.VertexIndex = mapping[v.VertexIndex];
- ASSERT_LOW_LAYER(v.VertexIndex < (uint32)newVertexCounter);
- dstBlendShape.Vertices[j++] = v;
+ v.VertexIndex = newVertexIndex;
+ dstBlendShape.Vertices.Add(v);
}
}
}
@@ -376,7 +373,7 @@ bool MeshData::GenerateNormals(float smoothingAngle)
Float3::Max(max, v3, max);
}
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
// Set up a SpatialSort to quickly find all vertices close to a given position
Assimp::SpatialSort vertexFinder;
vertexFinder.Fill((const aiVector3D*)Positions.Get(), vertexCount, sizeof(Float3));
@@ -399,7 +396,7 @@ bool MeshData::GenerateNormals(float smoothingAngle)
continue;
// Get all vertices that share this one
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
vertexFinder.FindPositions(*(aiVector3D*)&Positions[i], posEpsilon, verticesFound);
const int32 verticesFoundCount = (int32)verticesFound.size();
#else
@@ -429,7 +426,7 @@ bool MeshData::GenerateNormals(float smoothingAngle)
for (int32 i = 0; i < vertexCount; i++)
{
// Get all vertices that share this one
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
vertexFinder.FindPositions(*(aiVector3D*)&Positions[i], posEpsilon, verticesFound);
const int32 verticesFoundCount = (int32)verticesFound.size();
#else
@@ -623,7 +620,7 @@ bool MeshData::GenerateTangents(float smoothingAngle)
}
}
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
// Set up a SpatialSort to quickly find all vertices close to a given position
Assimp::SpatialSort vertexFinder;
vertexFinder.Fill((const aiVector3D*)Positions.Get(), vertexCount, sizeof(Float3));
@@ -648,7 +645,7 @@ bool MeshData::GenerateTangents(float smoothingAngle)
closeVertices.Clear();
// Find all vertices close to that position
-#if USE_SPARIAL_SORT
+#if USE_SPATIAL_SORT
vertexFinder.FindPositions(*(aiVector3D*)&origPos, posEpsilon, verticesFound);
const int32 verticesFoundCount = (int32)verticesFound.size();
#else
diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp
index 4462251c5..bbc90ab7f 100644
--- a/Source/Engine/Level/Actor.cpp
+++ b/Source/Engine/Level/Actor.cpp
@@ -543,6 +543,15 @@ void Actor::SetLayerRecursive(int32 layerIndex)
OnLayerChanged();
}
+void Actor::SetName(String&& value)
+{
+ if (_name == value)
+ return;
+ _name = MoveTemp(value);
+ if (GetScene())
+ Level::callActorEvent(Level::ActorEventType::OnActorNameChanged, this, nullptr);
+}
+
void Actor::SetName(const StringView& value)
{
if (_name == value)
diff --git a/Source/Engine/Level/Actor.h b/Source/Engine/Level/Actor.h
index 5a792858d..bd3bb1c59 100644
--- a/Source/Engine/Level/Actor.h
+++ b/Source/Engine/Level/Actor.h
@@ -185,7 +185,13 @@ public:
/// Sets the actor name.
///
/// The value to set.
- API_PROPERTY() void SetName(const StringView& value);
+ API_PROPERTY() void SetName(String&& value);
+
+ ///
+ /// Sets the actor name.
+ ///
+ /// The value to set.
+ void SetName(const StringView& value);
public:
///
diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp
index 95f3d7f0f..f24a853e3 100644
--- a/Source/Engine/Level/Level.cpp
+++ b/Source/Engine/Level/Level.cpp
@@ -930,6 +930,9 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou
// Fire event
CallSceneEvent(SceneEventType::OnSceneLoading, scene, sceneId);
+ // Get any injected children of the scene.
+ Array injectedSceneChildren = scene->Children;
+
// Loaded scene objects list
CollectionPoolCache::ScopeCache sceneObjects = ActorsCache::SceneObjectsListCache.Get();
const int32 dataCount = (int32)data.Size();
@@ -1031,6 +1034,20 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou
// /\ all above this has to be done on multiple threads at once
// \/ all below this has to be done on an any thread
+ // Add injected children of scene (via OnSceneLoading) into sceneObjects to be initialized
+ for (auto child : injectedSceneChildren)
+ {
+ Array injectedSceneObjects;
+ injectedSceneObjects.Add(child);
+ SceneQuery::GetAllSceneObjects(child, injectedSceneObjects);
+ for (auto o : injectedSceneObjects)
+ {
+ if (!o->IsRegistered())
+ o->RegisterObject();
+ sceneObjects->Add(o);
+ }
+ }
+
// Synchronize prefab instances (prefab may have objects removed or reordered so deserialized instances need to synchronize with it)
// TODO: resave and force sync scenes during game cooking so this step could be skipped in game
SceneObjectsFactory::SynchronizePrefabInstances(context, prefabSyncData);
@@ -1047,7 +1064,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou
PROFILE_CPU_NAMED("Initialize");
SceneObject** objects = sceneObjects->Get();
- for (int32 i = 0; i < dataCount; i++)
+ for (int32 i = 0; i < sceneObjects->Count(); i++)
{
SceneObject* obj = objects[i];
if (obj)
diff --git a/Source/Engine/Level/Scene/Scene.cpp b/Source/Engine/Level/Scene/Scene.cpp
index c7e1bc511..f562dfbe4 100644
--- a/Source/Engine/Level/Scene/Scene.cpp
+++ b/Source/Engine/Level/Scene/Scene.cpp
@@ -180,7 +180,7 @@ void Scene::CreateCsgCollider()
// Create collider
auto result = New();
result->SetStaticFlags(StaticFlags::FullyStatic);
- result->SetName(CSG_COLLIDER_NAME);
+ result->SetName(String(CSG_COLLIDER_NAME));
result->CollisionData = CSGData.CollisionData;
result->HideFlags |= HideFlags::DontSelect;
@@ -203,7 +203,7 @@ void Scene::CreateCsgModel()
// Create model
auto result = New();
result->SetStaticFlags(StaticFlags::FullyStatic);
- result->SetName(CSG_MODEL_NAME);
+ result->SetName(String(CSG_MODEL_NAME));
result->Model = CSGData.Model;
result->HideFlags |= HideFlags::DontSelect;
diff --git a/Source/Engine/Physics/Actors/Cloth.cpp b/Source/Engine/Physics/Actors/Cloth.cpp
index 0b5e75a7e..2781cb161 100644
--- a/Source/Engine/Physics/Actors/Cloth.cpp
+++ b/Source/Engine/Physics/Actors/Cloth.cpp
@@ -27,6 +27,11 @@ Cloth::Cloth(const SpawnParams& params)
_drawCategory = SceneRendering::SceneDrawAsync;
}
+void* Cloth::GetPhysicsCloth() const
+{
+ return _cloth;
+}
+
ModelInstanceActor::MeshReference Cloth::GetMesh() const
{
auto value = _mesh;
diff --git a/Source/Engine/Physics/Actors/Cloth.h b/Source/Engine/Physics/Actors/Cloth.h
index 92c1dea92..5bdcd6e5a 100644
--- a/Source/Engine/Physics/Actors/Cloth.h
+++ b/Source/Engine/Physics/Actors/Cloth.h
@@ -13,7 +13,6 @@
///
API_CLASS(Attributes="ActorContextMenu(\"New/Physics/Cloth\"), ActorToolbox(\"Physics\")") class FLAXENGINE_API Cloth : public Actor
{
- friend class PhysicsBackend;
DECLARE_SCENE_OBJECT(Cloth);
///
@@ -231,6 +230,11 @@ private:
Array _paint;
public:
+ ///
+ /// Gets the native physics backend object.
+ ///
+ void* GetPhysicsCloth() const;
+
///
/// Gets the mesh to use for the cloth simulation (single mesh from specific LOD). Always from the parent static or animated model actor.
///
diff --git a/Source/Engine/Physics/Actors/WheeledVehicle.h b/Source/Engine/Physics/Actors/WheeledVehicle.h
index 29e3dfbe6..16e226546 100644
--- a/Source/Engine/Physics/Actors/WheeledVehicle.h
+++ b/Source/Engine/Physics/Actors/WheeledVehicle.h
@@ -13,6 +13,7 @@
API_CLASS(Attributes="ActorContextMenu(\"New/Physics/Wheeled Vehicle\"), ActorToolbox(\"Physics\")") class FLAXENGINE_API WheeledVehicle : public RigidBody
{
friend class PhysicsBackend;
+ friend struct ScenePhysX;
DECLARE_SCENE_OBJECT(WheeledVehicle);
///
diff --git a/Source/Engine/Physics/Colliders/Collider.cpp b/Source/Engine/Physics/Colliders/Collider.cpp
index 793368abe..7e0a149aa 100644
--- a/Source/Engine/Physics/Colliders/Collider.cpp
+++ b/Source/Engine/Physics/Colliders/Collider.cpp
@@ -71,6 +71,7 @@ void Collider::SetContactOffset(float value)
bool Collider::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
resultHitDistance = MAX_float;
if (_shape == nullptr)
return false;
@@ -79,6 +80,7 @@ bool Collider::RayCast(const Vector3& origin, const Vector3& direction, float& r
bool Collider::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
if (_shape == nullptr)
return false;
return PhysicsBackend::RayCastShape(_shape, _transform.Translation, _transform.Orientation, origin, direction, hitInfo, maxDistance);
diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp
index 55b15ab5d..d88031a01 100644
--- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp
+++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp
@@ -102,9 +102,13 @@ struct ScenePhysX
Array ClothsList;
#endif
+#if WITH_VEHICLE
+ void UpdateVehicles(float dt);
+#endif
#if WITH_CLOTH
void PreSimulateCloth(int32 i);
void SimulateCloth(int32 i);
+ void UpdateCloths(float dt);
#endif
};
@@ -725,6 +729,451 @@ void InitVehicleSDK()
}
}
+void ScenePhysX::UpdateVehicles(float dt)
+{
+ if (WheelVehicles.IsEmpty())
+ return;
+ PROFILE_CPU_NAMED("Physics.Vehicles");
+
+ // Update vehicles steering
+ WheelVehiclesCache.Clear();
+ WheelVehiclesCache.EnsureCapacity(WheelVehicles.Count());
+ int32 wheelsCount = 0;
+ for (auto wheelVehicle : WheelVehicles)
+ {
+ if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
+ continue;
+ auto drive = (PxVehicleWheels*)wheelVehicle->_vehicle;
+ ASSERT(drive);
+ WheelVehiclesCache.Add(drive);
+ wheelsCount += drive->mWheelsSimData.getNbWheels();
+
+ const float deadZone = 0.1f;
+ bool isTank = wheelVehicle->_driveType == WheeledVehicle::DriveTypes::Tank;
+ float throttle = wheelVehicle->_throttle;
+ float steering = wheelVehicle->_steering;
+ float brake = wheelVehicle->_brake;
+ float leftThrottle = wheelVehicle->_tankLeftThrottle;
+ float rightThrottle = wheelVehicle->_tankRightThrottle;
+ float leftBrake = Math::Max(wheelVehicle->_tankLeftBrake, wheelVehicle->_handBrake);
+ float rightBrake = Math::Max(wheelVehicle->_tankRightBrake, wheelVehicle->_handBrake);
+ WheeledVehicle::DriveModes vehicleDriveMode = wheelVehicle->_driveControl.DriveMode;
+
+ if (isTank)
+ {
+ // Converting default vehicle controls to tank controls.
+ if (throttle != 0 || steering != 0)
+ {
+ leftThrottle = Math::Clamp(throttle + steering, -1.0f, 1.0f);
+ rightThrottle = Math::Clamp(throttle - steering, -1.0f, 1.0f);
+ }
+ }
+
+ // Converting special tank drive mode to standard tank mode when is turning.
+ if (isTank && vehicleDriveMode == WheeledVehicle::DriveModes::Standard)
+ {
+ // Special inputs when turning vehicle -1 1 to left or 1 -1 to turn right
+ // to:
+ // Standard inputs when turning vehicle 0 1 to left or 1 0 to turn right
+
+ if (leftThrottle < -deadZone && rightThrottle > deadZone)
+ {
+ leftThrottle = 0;
+ leftBrake = 1;
+ }
+ else if (leftThrottle > deadZone && rightThrottle < -deadZone)
+ {
+ rightThrottle = 0;
+ rightBrake = 1;
+ }
+ }
+
+ if (wheelVehicle->UseReverseAsBrake)
+ {
+ const float invalidDirectionThreshold = 80.0f;
+ const float breakThreshold = 8.0f;
+ const float forwardSpeed = wheelVehicle->GetForwardSpeed();
+ int currentGear = wheelVehicle->GetCurrentGear();
+ // Tank tracks direction: 1 forward -1 backward 0 neutral
+ bool toForward = false;
+ toForward |= throttle > deadZone;
+ toForward |= (leftThrottle > deadZone) && (rightThrottle > deadZone); // 1 1
+
+ bool toBackward = false;
+ toBackward |= throttle < -deadZone;
+ toBackward |= (leftThrottle < -deadZone) && (rightThrottle < -deadZone); // -1 -1
+ toBackward |= (leftThrottle < -deadZone) && (rightThrottle < deadZone); // -1 0
+ toBackward |= (leftThrottle < deadZone) && (rightThrottle < -deadZone); // 0 -1
+
+ bool isTankTurning = false;
+
+ if (isTank)
+ {
+ isTankTurning |= leftThrottle > deadZone && rightThrottle < -deadZone; // 1 -1
+ isTankTurning |= leftThrottle < -deadZone && rightThrottle > deadZone; // -1 1
+ isTankTurning |= leftThrottle < deadZone && rightThrottle > deadZone; // 0 1
+ isTankTurning |= leftThrottle > deadZone && rightThrottle < deadZone; // 1 0
+ isTankTurning |= leftThrottle < -deadZone && rightThrottle < deadZone; // -1 0
+ isTankTurning |= leftThrottle < deadZone && rightThrottle < -deadZone; // 0 -1
+
+ if (toForward || toBackward)
+ {
+ isTankTurning = false;
+ }
+ }
+
+ // Automatic gear change when changing driving direction
+ if (Math::Abs(forwardSpeed) < invalidDirectionThreshold)
+ {
+ int targetGear = wheelVehicle->GetTargetGear();
+ if (toBackward && currentGear > 0 && targetGear >= 0)
+ {
+ currentGear = -1;
+ }
+ else if (!toBackward && currentGear <= 0 && targetGear <= 0)
+ {
+ currentGear = 1;
+ }
+ else if (isTankTurning && currentGear <= 0)
+ {
+ currentGear = 1;
+ }
+
+ if (wheelVehicle->GetCurrentGear() != currentGear)
+ {
+ wheelVehicle->SetCurrentGear(currentGear);
+ }
+ }
+
+ // Automatic break when changing driving direction
+ if (toForward)
+ {
+ if (forwardSpeed < -invalidDirectionThreshold)
+ {
+ brake = 1.0f;
+ leftBrake = 1.0f;
+ rightBrake = 1.0f;
+ }
+ }
+ else if (toBackward)
+ {
+ if (forwardSpeed > invalidDirectionThreshold)
+ {
+ brake = 1.0f;
+ leftBrake = 1.0f;
+ rightBrake = 1.0f;
+ }
+ }
+ else
+ {
+ if (forwardSpeed < breakThreshold && forwardSpeed > -breakThreshold && !isTankTurning) // not accelerating, very slow speed -> stop
+ {
+ brake = 1.0f;
+ leftBrake = 1.0f;
+ rightBrake = 1.0f;
+ }
+ }
+
+ // Block throttle if user is changing driving direction
+ if ((toForward && currentGear < 0) || (toBackward && currentGear > 0))
+ {
+ throttle = 0.0f;
+ leftThrottle = 0;
+ rightThrottle = 0;
+ }
+
+ throttle = Math::Abs(throttle);
+
+ if (isTank)
+ {
+ // invert acceleration when moving to backward because tank inputs can be < 0
+ if (currentGear < 0)
+ {
+ float lt = -leftThrottle;
+ float rt = -rightThrottle;
+ float lb = leftBrake;
+ float rb = rightBrake;
+ leftThrottle = rt;
+ rightThrottle = lt;
+ leftBrake = rb;
+ rightBrake = lb;
+ }
+ }
+ }
+ else
+ {
+ throttle = Math::Max(throttle, 0.0f);
+ }
+
+ // Force brake the another side track to turn faster
+ if (Math::Abs(leftThrottle) > deadZone && Math::Abs(rightThrottle) < deadZone)
+ {
+ rightBrake = 1.0f;
+ }
+ if (Math::Abs(rightThrottle) > deadZone && Math::Abs(leftThrottle) < deadZone)
+ {
+ leftBrake = 1.0f;
+ }
+
+ // Smooth input controls
+ // @formatter:off
+ PxVehiclePadSmoothingData padSmoothing =
+ {
+ {
+ wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL
+ wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE
+ wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE
+ wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT
+ wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT
+ },
+ {
+ wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL
+ wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE
+ wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE
+ wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT
+ wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT
+ }
+ };
+ PxVehicleKeySmoothingData keySmoothing =
+ {
+ {
+ wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL
+ wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE
+ wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE
+ wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT
+ wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT
+ },
+ {
+ wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL
+ wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE
+ wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE
+ wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT
+ wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT
+ }
+ };
+ // @formatter:on
+
+ // Reduce steer by speed to make vehicle easier to maneuver
+ constexpr int steerVsSpeedN = 8;
+ PxF32 steerVsForwardSpeedData[steerVsSpeedN];
+ const int lastSteerVsSpeedIndex = wheelVehicle->_driveControl.SteerVsSpeed.Count() - 1;
+ int steerVsSpeedIndex = 0;
+
+ // Steer vs speed data structure example:
+ // array:
+ // speed, steer
+ // 1000, 1.0,
+ // 2000, 0.7,
+ // 5000, 0.5,
+ // ..
+
+ // fill the steerVsForwardSpeedData with the speed and steer
+ for (int32 i = 0; i < 8; i += 2)
+ {
+ steerVsForwardSpeedData[i] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Speed;
+ steerVsForwardSpeedData[i + 1] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Steer;
+ steerVsSpeedIndex = Math::Min(steerVsSpeedIndex + 1, lastSteerVsSpeedIndex);
+ }
+ const PxFixedSizeLookupTable steerVsForwardSpeed(steerVsForwardSpeedData, 4);
+
+ if (wheelVehicle->UseAnalogSteering)
+ {
+ switch (wheelVehicle->_driveTypeCurrent)
+ {
+ case WheeledVehicle::DriveTypes::Drive4W:
+ {
+ PxVehicleDrive4WRawInputData rawInputData;
+ rawInputData.setAnalogAccel(throttle);
+ rawInputData.setAnalogBrake(brake);
+ rawInputData.setAnalogSteer(wheelVehicle->_steering);
+ rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake);
+ PxVehicleDrive4WSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDrive4W*)drive);
+ break;
+ }
+ case WheeledVehicle::DriveTypes::DriveNW:
+ {
+ PxVehicleDriveNWRawInputData rawInputData;
+ rawInputData.setAnalogAccel(throttle);
+ rawInputData.setAnalogBrake(brake);
+ rawInputData.setAnalogSteer(wheelVehicle->_steering);
+ rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake);
+ PxVehicleDriveNWSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDriveNW*)drive);
+ break;
+ }
+ case WheeledVehicle::DriveTypes::Tank:
+ {
+ PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL;
+ PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode);
+ rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle)));
+ rawInputData.setAnalogLeftBrake(leftBrake);
+ rawInputData.setAnalogRightBrake(rightBrake);
+ rawInputData.setAnalogLeftThrust(leftThrottle);
+ rawInputData.setAnalogRightThrust(rightThrottle);
+ PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, dt, *(PxVehicleDriveTank*)drive);
+ break;
+ }
+ }
+ }
+ else
+ {
+ switch (wheelVehicle->_driveTypeCurrent)
+ {
+ case WheeledVehicle::DriveTypes::Drive4W:
+ {
+ PxVehicleDrive4WRawInputData rawInputData;
+ rawInputData.setDigitalAccel(throttle > deadZone);
+ rawInputData.setDigitalBrake(brake > deadZone);
+ rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone);
+ rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone);
+ rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone);
+ PxVehicleDrive4WSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDrive4W*)drive);
+ break;
+ }
+ case WheeledVehicle::DriveTypes::DriveNW:
+ {
+ PxVehicleDriveNWRawInputData rawInputData;
+ rawInputData.setDigitalAccel(throttle > deadZone);
+ rawInputData.setDigitalBrake(brake > deadZone);
+ rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone);
+ rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone);
+ rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone);
+ PxVehicleDriveNWSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDriveNW*)drive);
+ break;
+ }
+ case WheeledVehicle::DriveTypes::Tank:
+ {
+ // Convert analog inputs to digital inputs
+ leftThrottle = Math::Round(leftThrottle);
+ rightThrottle = Math::Round(rightThrottle);
+ leftBrake = Math::Round(leftBrake);
+ rightBrake = Math::Round(rightBrake);
+
+ PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL;
+ PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode);
+ rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle)));
+ rawInputData.setAnalogLeftBrake(leftBrake);
+ rawInputData.setAnalogRightBrake(rightBrake);
+ rawInputData.setAnalogLeftThrust(leftThrottle);
+ rawInputData.setAnalogRightThrust(rightThrottle);
+
+ // Needs to pass analog values to vehicle to maintain current movement direction because digital inputs accept only true/false values to tracks thrust instead of -1 to 1
+ PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, dt, *(PxVehicleDriveTank*)drive);
+ break;
+ }
+ }
+ }
+ }
+
+ // Update batches queries cache
+ if (wheelsCount > WheelRaycastBatchQuerySize)
+ {
+ if (WheelRaycastBatchQuery)
+ WheelRaycastBatchQuery->release();
+ WheelRaycastBatchQuerySize = wheelsCount;
+ WheelRaycastBatchQuery = PxCreateBatchQueryExt(*Scene, &WheelRaycastFilter, wheelsCount, wheelsCount, 0, 0, 0, 0);
+ }
+
+ // Update lookup table that maps wheel type into the surface friction
+ if (!WheelTireFrictions || WheelTireFrictionsDirty)
+ {
+ WheelTireFrictionsDirty = false;
+ RELEASE_PHYSX(WheelTireFrictions);
+ Array> materials;
+ materials.Resize(Math::Min((int32)PhysX->getNbMaterials(), PxVehicleDrivableSurfaceToTireFrictionPairs::eMAX_NB_SURFACE_TYPES));
+ PxMaterial** materialsPtr = materials.Get();
+ PhysX->getMaterials(materialsPtr, materials.Count(), 0);
+ Array> tireTypes;
+ tireTypes.Resize(materials.Count());
+ PxVehicleDrivableSurfaceType* tireTypesPtr = tireTypes.Get();
+ for (int32 i = 0; i < tireTypes.Count(); i++)
+ tireTypesPtr[i].mType = i;
+ WheelTireFrictions = PxVehicleDrivableSurfaceToTireFrictionPairs::allocate(WheelTireTypes.Count(), materials.Count());
+ WheelTireFrictions->setup(WheelTireTypes.Count(), materials.Count(), (const PxMaterial**)materialsPtr, tireTypesPtr);
+ for (int32 material = 0; material < materials.Count(); material++)
+ {
+ float friction = materialsPtr[material]->getStaticFriction();
+ for (int32 tireType = 0; tireType < WheelTireTypes.Count(); tireType++)
+ {
+ float scale = WheelTireTypes[tireType];
+ WheelTireFrictions->setTypePairFriction(material, tireType, friction * scale);
+ }
+ }
+ }
+
+ // Setup cache for wheel states
+ WheelVehiclesResultsPerVehicle.Resize(WheelVehiclesCache.Count(), false);
+ WheelVehiclesResultsPerWheel.Resize(wheelsCount, false);
+ wheelsCount = 0;
+ for (int32 i = 0, ii = 0; i < WheelVehicles.Count(); i++)
+ {
+ auto wheelVehicle = WheelVehicles[i];
+ if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
+ continue;
+ auto drive = (PxVehicleWheels*)WheelVehicles[ii]->_vehicle;
+ auto& perVehicle = WheelVehiclesResultsPerVehicle[ii];
+ ii++;
+ perVehicle.nbWheelQueryResults = drive->mWheelsSimData.getNbWheels();
+ perVehicle.wheelQueryResults = WheelVehiclesResultsPerWheel.Get() + wheelsCount;
+ wheelsCount += perVehicle.nbWheelQueryResults;
+ }
+
+ // Update vehicles
+ if (WheelVehiclesCache.Count() != 0)
+ {
+ PxVehicleSuspensionRaycasts(WheelRaycastBatchQuery, WheelVehiclesCache.Count(), WheelVehiclesCache.Get());
+ PxVehicleUpdates(dt, Scene->getGravity(), *WheelTireFrictions, WheelVehiclesCache.Count(), WheelVehiclesCache.Get(), WheelVehiclesResultsPerVehicle.Get());
+ }
+
+ // Synchronize state
+ for (int32 i = 0, ii = 0; i < WheelVehicles.Count(); i++)
+ {
+ auto wheelVehicle = WheelVehicles[i];
+ if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
+ continue;
+ auto drive = WheelVehiclesCache[ii];
+ auto& perVehicle = WheelVehiclesResultsPerVehicle[ii];
+ ii++;
+#if PHYSX_VEHICLE_DEBUG_TELEMETRY
+ LOG(Info, "Vehicle[{}] Gear={}, RPM={}", ii, wheelVehicle->GetCurrentGear(), (int32)wheelVehicle->GetEngineRotationSpeed());
+#endif
+
+ // Update wheels
+ for (int32 j = 0; j < wheelVehicle->_wheelsData.Count(); j++)
+ {
+ auto& wheelData = wheelVehicle->_wheelsData[j];
+ auto& perWheel = perVehicle.wheelQueryResults[j];
+#if PHYSX_VEHICLE_DEBUG_TELEMETRY
+ LOG(Info, "Vehicle[{}] Wheel[{}] longitudinalSlip={}, lateralSlip={}, suspSpringForce={}", ii, j, Utilities::RoundTo2DecimalPlaces(perWheel.longitudinalSlip), Utilities::RoundTo2DecimalPlaces(perWheel.lateralSlip), (int32)perWheel.suspSpringForce);
+#endif
+
+ auto& state = wheelData.State;
+ state.IsInAir = perWheel.isInAir;
+ state.TireContactCollider = perWheel.tireContactShape ? static_cast(perWheel.tireContactShape->userData) : nullptr;
+ state.TireContactPoint = P2C(perWheel.tireContactPoint) + Origin;
+ state.TireContactNormal = P2C(perWheel.tireContactNormal);
+ state.TireFriction = perWheel.tireFriction;
+ state.SteerAngle = RadiansToDegrees * perWheel.steerAngle;
+ state.RotationAngle = -RadiansToDegrees * drive->mWheelsDynData.getWheelRotationAngle(j);
+ state.SuspensionOffset = perWheel.suspJounce;
+#if USE_EDITOR
+ state.SuspensionTraceStart = P2C(perWheel.suspLineStart) + Origin;
+ state.SuspensionTraceEnd = P2C(perWheel.suspLineStart + perWheel.suspLineDir * perWheel.suspLineLength) + Origin;
+#endif
+
+ if (!wheelData.Collider)
+ continue;
+ auto shape = (PxShape*)wheelData.Collider->GetPhysicsShape();
+
+ // Update wheel collider transformation
+ auto localPose = shape->getLocalPose();
+ Transform t = wheelData.Collider->GetLocalTransform();
+ t.Orientation = Quaternion::Euler(-state.RotationAngle, state.SteerAngle, 0) * wheelData.LocalOrientation;
+ t.Translation = P2C(localPose.p) / wheelVehicle->GetScale() - t.Orientation * wheelData.Collider->GetCenter();
+ wheelData.Collider->SetLocalTransform(t);
+ }
+ }
+}
+
#endif
#if WITH_CLOTH
@@ -928,6 +1377,58 @@ void ScenePhysX::SimulateCloth(int32 i)
ClothSolver->simulateChunk(i);
}
+void ScenePhysX::UpdateCloths(float dt)
+{
+ nv::cloth::Solver* clothSolver = ClothSolver;
+ if (!clothSolver || ClothsList.IsEmpty())
+ return;
+ PROFILE_CPU_NAMED("Physics.Cloth");
+
+ {
+ PROFILE_CPU_NAMED("Pre");
+ Function job;
+ job.Bind(this);
+ JobSystem::Execute(job, ClothsList.Count());
+ }
+
+ {
+ PROFILE_CPU_NAMED("Simulation");
+ if (clothSolver->beginSimulation(dt))
+ {
+ Function job;
+ job.Bind(this);
+ JobSystem::Execute(job, clothSolver->getSimulationChunkCount());
+ clothSolver->endSimulation();
+ }
+ }
+
+ {
+ PROFILE_CPU_NAMED("Post");
+ ScopeLock lock(ClothLocker);
+ Array brokenCloths;
+ for (auto clothPhysX : ClothsList)
+ {
+ const auto& clothSettings = Cloths[clothPhysX];
+ if (clothSettings.Culled)
+ continue;
+ if (clothSettings.UpdateBounds(clothPhysX))
+ brokenCloths.Add(clothSettings.Actor);
+ clothSettings.Actor->OnPostUpdate();
+ }
+ for (auto cloth : brokenCloths)
+ {
+ // Rebuild cloth object but keep fabric ref to prevent fabric recook
+ auto fabric = &((nv::cloth::Cloth*)cloth->GetPhysicsCloth())->getFabric();
+ Fabrics[fabric].Refs++;
+ fabric->incRefCount();
+ cloth->Rebuild();
+ fabric->decRefCount();
+ if (--Fabrics[fabric].Refs == 0)
+ Fabrics.Remove(fabric);
+ }
+ }
+}
+
#endif
void* PhysicalMaterial::GetPhysicsMaterial()
@@ -1105,6 +1606,24 @@ PxMaterial* PhysicsBackendPhysX::GetDefaultMaterial()
return DefaultMaterial;
}
+void PhysicsBackendPhysX::SimulationStepDone(PxScene* scene, float dt)
+{
+#if WITH_VEHICLE
+ ScenePhysX* scenePhysX = nullptr;
+ for (auto e : Physics::Scenes)
+ {
+ if (((ScenePhysX*)e->GetPhysicsScene())->Scene == scene)
+ {
+ scenePhysX = (ScenePhysX*)e->GetPhysicsScene();
+ break;
+ }
+ }
+ if (!scenePhysX)
+ return;
+ scenePhysX->UpdateVehicles(dt);
+#endif
+}
+
bool PhysicsBackend::Init()
{
#define CHECK_INIT(value, msg) if (!value) { LOG(Error, msg); return true; }
@@ -1394,451 +1913,6 @@ void PhysicsBackend::EndSimulateScene(void* scene)
scenePhysX->Stepper.wait(scenePhysX->Scene);
}
-#if WITH_VEHICLE
- if (scenePhysX->WheelVehicles.HasItems())
- {
- PROFILE_CPU_NAMED("Physics.Vehicles");
-
- // Update vehicles steering
- WheelVehiclesCache.Clear();
- WheelVehiclesCache.EnsureCapacity(scenePhysX->WheelVehicles.Count());
- int32 wheelsCount = 0;
- for (auto wheelVehicle : scenePhysX->WheelVehicles)
- {
- if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
- continue;
- auto drive = (PxVehicleWheels*)wheelVehicle->_vehicle;
- ASSERT(drive);
- WheelVehiclesCache.Add(drive);
- wheelsCount += drive->mWheelsSimData.getNbWheels();
-
- const float deadZone = 0.1f;
- bool isTank = wheelVehicle->_driveType == WheeledVehicle::DriveTypes::Tank;
- float throttle = wheelVehicle->_throttle;
- float steering = wheelVehicle->_steering;
- float brake = wheelVehicle->_brake;
- float leftThrottle = wheelVehicle->_tankLeftThrottle;
- float rightThrottle = wheelVehicle->_tankRightThrottle;
- float leftBrake = Math::Max(wheelVehicle->_tankLeftBrake, wheelVehicle->_handBrake);
- float rightBrake = Math::Max(wheelVehicle->_tankRightBrake, wheelVehicle->_handBrake);
- WheeledVehicle::DriveModes vehicleDriveMode = wheelVehicle->_driveControl.DriveMode;
-
- if (isTank)
- {
- // Converting default vehicle controls to tank controls.
- if (throttle != 0 || steering != 0)
- {
- leftThrottle = Math::Clamp(throttle + steering, -1.0f, 1.0f);
- rightThrottle = Math::Clamp(throttle - steering, -1.0f, 1.0f);
- }
- }
-
- // Converting special tank drive mode to standard tank mode when is turning.
- if (isTank && vehicleDriveMode == WheeledVehicle::DriveModes::Standard)
- {
- // Special inputs when turning vehicle -1 1 to left or 1 -1 to turn right
- // to:
- // Standard inputs when turning vehicle 0 1 to left or 1 0 to turn right
-
- if (leftThrottle < -deadZone && rightThrottle > deadZone)
- {
- leftThrottle = 0;
- leftBrake = 1;
- }
- else if (leftThrottle > deadZone && rightThrottle < -deadZone)
- {
- rightThrottle = 0;
- rightBrake = 1;
- }
- }
-
- if (wheelVehicle->UseReverseAsBrake)
- {
- const float invalidDirectionThreshold = 80.0f;
- const float breakThreshold = 8.0f;
- const float forwardSpeed = wheelVehicle->GetForwardSpeed();
- int currentGear = wheelVehicle->GetCurrentGear();
- // Tank tracks direction: 1 forward -1 backward 0 neutral
- bool toForward = false;
- toForward |= throttle > deadZone;
- toForward |= (leftThrottle > deadZone) && (rightThrottle > deadZone); // 1 1
-
- bool toBackward = false;
- toBackward |= throttle < -deadZone;
- toBackward |= (leftThrottle < -deadZone) && (rightThrottle < -deadZone); // -1 -1
- toBackward |= (leftThrottle < -deadZone) && (rightThrottle < deadZone); // -1 0
- toBackward |= (leftThrottle < deadZone) && (rightThrottle < -deadZone); // 0 -1
-
- bool isTankTurning = false;
-
- if (isTank)
- {
- isTankTurning |= leftThrottle > deadZone && rightThrottle < -deadZone; // 1 -1
- isTankTurning |= leftThrottle < -deadZone && rightThrottle > deadZone; // -1 1
- isTankTurning |= leftThrottle < deadZone && rightThrottle > deadZone; // 0 1
- isTankTurning |= leftThrottle > deadZone && rightThrottle < deadZone; // 1 0
- isTankTurning |= leftThrottle < -deadZone && rightThrottle < deadZone; // -1 0
- isTankTurning |= leftThrottle < deadZone && rightThrottle < -deadZone; // 0 -1
-
- if (toForward || toBackward)
- {
- isTankTurning = false;
- }
- }
-
- // Automatic gear change when changing driving direction
- if (Math::Abs(forwardSpeed) < invalidDirectionThreshold)
- {
- int targetGear = wheelVehicle->GetTargetGear();
- if (toBackward && currentGear > 0 && targetGear >= 0)
- {
- currentGear = -1;
- }
- else if (!toBackward && currentGear <= 0 && targetGear <= 0)
- {
- currentGear = 1;
- }
- else if (isTankTurning && currentGear <= 0)
- {
- currentGear = 1;
- }
-
- if (wheelVehicle->GetCurrentGear() != currentGear)
- {
- wheelVehicle->SetCurrentGear(currentGear);
- }
- }
-
- // Automatic break when changing driving direction
- if (toForward)
- {
- if (forwardSpeed < -invalidDirectionThreshold)
- {
- brake = 1.0f;
- leftBrake = 1.0f;
- rightBrake = 1.0f;
- }
- }
- else if (toBackward)
- {
- if (forwardSpeed > invalidDirectionThreshold)
- {
- brake = 1.0f;
- leftBrake = 1.0f;
- rightBrake = 1.0f;
- }
- }
- else
- {
- if (forwardSpeed < breakThreshold && forwardSpeed > -breakThreshold && !isTankTurning) // not accelerating, very slow speed -> stop
- {
- brake = 1.0f;
- leftBrake = 1.0f;
- rightBrake = 1.0f;
- }
- }
-
- // Block throttle if user is changing driving direction
- if ((toForward && currentGear < 0) || (toBackward && currentGear > 0))
- {
- throttle = 0.0f;
- leftThrottle = 0;
- rightThrottle = 0;
- }
-
- throttle = Math::Abs(throttle);
-
- if (isTank)
- {
- // invert acceleration when moving to backward because tank inputs can be < 0
- if (currentGear < 0)
- {
- float lt = -leftThrottle;
- float rt = -rightThrottle;
- float lb = leftBrake;
- float rb = rightBrake;
- leftThrottle = rt;
- rightThrottle = lt;
- leftBrake = rb;
- rightBrake = lb;
- }
- }
- }
- else
- {
- throttle = Math::Max(throttle, 0.0f);
- }
-
- // Force brake the another side track to turn faster
- if (Math::Abs(leftThrottle) > deadZone && Math::Abs(rightThrottle) < deadZone)
- {
- rightBrake = 1.0f;
- }
- if (Math::Abs(rightThrottle) > deadZone && Math::Abs(leftThrottle) < deadZone)
- {
- leftBrake = 1.0f;
- }
-
- // Smooth input controls
- // @formatter:off
- PxVehiclePadSmoothingData padSmoothing =
- {
- {
- wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL
- wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE
- wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE
- wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT
- wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT
- },
- {
- wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL
- wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE
- wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE
- wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT
- wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT
- }
- };
- PxVehicleKeySmoothingData keySmoothing =
- {
- {
- wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL
- wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE
- wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE
- wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT
- wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT
- },
- {
- wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL
- wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE
- wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE
- wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT
- wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT
- }
- };
- // @formatter:on
-
- // Reduce steer by speed to make vehicle easier to maneuver
- constexpr int steerVsSpeedN = 8;
- PxF32 steerVsForwardSpeedData[steerVsSpeedN];
- const int lastSteerVsSpeedIndex = wheelVehicle->_driveControl.SteerVsSpeed.Count() - 1;
- int steerVsSpeedIndex = 0;
-
- // Steer vs speed data structure example:
- // array:
- // speed, steer
- // 1000, 1.0,
- // 2000, 0.7,
- // 5000, 0.5,
- // ..
-
- // fill the steerVsForwardSpeedData with the speed and steer
- for (int32 i = 0; i < 8; i += 2)
- {
- steerVsForwardSpeedData[i] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Speed;
- steerVsForwardSpeedData[i + 1] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Steer;
- steerVsSpeedIndex = Math::Min(steerVsSpeedIndex + 1, lastSteerVsSpeedIndex);
- }
- const PxFixedSizeLookupTable steerVsForwardSpeed(steerVsForwardSpeedData, 4);
-
- if (wheelVehicle->UseAnalogSteering)
- {
- switch (wheelVehicle->_driveTypeCurrent)
- {
- case WheeledVehicle::DriveTypes::Drive4W:
- {
- PxVehicleDrive4WRawInputData rawInputData;
- rawInputData.setAnalogAccel(throttle);
- rawInputData.setAnalogBrake(brake);
- rawInputData.setAnalogSteer(wheelVehicle->_steering);
- rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake);
- PxVehicleDrive4WSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDrive4W*)drive);
- break;
- }
- case WheeledVehicle::DriveTypes::DriveNW:
- {
- PxVehicleDriveNWRawInputData rawInputData;
- rawInputData.setAnalogAccel(throttle);
- rawInputData.setAnalogBrake(brake);
- rawInputData.setAnalogSteer(wheelVehicle->_steering);
- rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake);
- PxVehicleDriveNWSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDriveNW*)drive);
- break;
- }
- case WheeledVehicle::DriveTypes::Tank:
- {
- PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL;
- PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode);
- rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle)));
- rawInputData.setAnalogLeftBrake(leftBrake);
- rawInputData.setAnalogRightBrake(rightBrake);
- rawInputData.setAnalogLeftThrust(leftThrottle);
- rawInputData.setAnalogRightThrust(rightThrottle);
- PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, scenePhysX->LastDeltaTime, *(PxVehicleDriveTank*)drive);
- break;
- }
- }
- }
- else
- {
- switch (wheelVehicle->_driveTypeCurrent)
- {
- case WheeledVehicle::DriveTypes::Drive4W:
- {
- PxVehicleDrive4WRawInputData rawInputData;
- rawInputData.setDigitalAccel(throttle > deadZone);
- rawInputData.setDigitalBrake(brake > deadZone);
- rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone);
- rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone);
- rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone);
- PxVehicleDrive4WSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDrive4W*)drive);
- break;
- }
- case WheeledVehicle::DriveTypes::DriveNW:
- {
- PxVehicleDriveNWRawInputData rawInputData;
- rawInputData.setDigitalAccel(throttle > deadZone);
- rawInputData.setDigitalBrake(brake > deadZone);
- rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone);
- rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone);
- rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone);
- PxVehicleDriveNWSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDriveNW*)drive);
- break;
- }
- case WheeledVehicle::DriveTypes::Tank:
- {
- // Convert analog inputs to digital inputs
- leftThrottle = Math::Round(leftThrottle);
- rightThrottle = Math::Round(rightThrottle);
- leftBrake = Math::Round(leftBrake);
- rightBrake = Math::Round(rightBrake);
-
- PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL;
- PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode);
- rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle)));
- rawInputData.setAnalogLeftBrake(leftBrake);
- rawInputData.setAnalogRightBrake(rightBrake);
- rawInputData.setAnalogLeftThrust(leftThrottle);
- rawInputData.setAnalogRightThrust(rightThrottle);
-
- // Needs to pass analog values to vehicle to maintain current movement direction because digital inputs accept only true/false values to tracks thrust instead of -1 to 1
- PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, scenePhysX->LastDeltaTime, *(PxVehicleDriveTank*)drive);
- break;
- }
- }
- }
- }
-
- // Update batches queries cache
- if (wheelsCount > scenePhysX->WheelRaycastBatchQuerySize)
- {
- if (scenePhysX->WheelRaycastBatchQuery)
- scenePhysX->WheelRaycastBatchQuery->release();
- scenePhysX->WheelRaycastBatchQuerySize = wheelsCount;
- scenePhysX->WheelRaycastBatchQuery = PxCreateBatchQueryExt(*scenePhysX->Scene, &WheelRaycastFilter, wheelsCount, wheelsCount, 0, 0, 0, 0);
- }
-
- // Update lookup table that maps wheel type into the surface friction
- if (!WheelTireFrictions || WheelTireFrictionsDirty)
- {
- WheelTireFrictionsDirty = false;
- RELEASE_PHYSX(WheelTireFrictions);
- Array> materials;
- materials.Resize(Math::Min((int32)PhysX->getNbMaterials(), PxVehicleDrivableSurfaceToTireFrictionPairs::eMAX_NB_SURFACE_TYPES));
- PxMaterial** materialsPtr = materials.Get();
- PhysX->getMaterials(materialsPtr, materials.Count(), 0);
- Array> tireTypes;
- tireTypes.Resize(materials.Count());
- PxVehicleDrivableSurfaceType* tireTypesPtr = tireTypes.Get();
- for (int32 i = 0; i < tireTypes.Count(); i++)
- tireTypesPtr[i].mType = i;
- WheelTireFrictions = PxVehicleDrivableSurfaceToTireFrictionPairs::allocate(WheelTireTypes.Count(), materials.Count());
- WheelTireFrictions->setup(WheelTireTypes.Count(), materials.Count(), (const PxMaterial**)materialsPtr, tireTypesPtr);
- for (int32 material = 0; material < materials.Count(); material++)
- {
- float friction = materialsPtr[material]->getStaticFriction();
- for (int32 tireType = 0; tireType < WheelTireTypes.Count(); tireType++)
- {
- float scale = WheelTireTypes[tireType];
- WheelTireFrictions->setTypePairFriction(material, tireType, friction * scale);
- }
- }
- }
-
- // Setup cache for wheel states
- WheelVehiclesResultsPerVehicle.Resize(WheelVehiclesCache.Count(), false);
- WheelVehiclesResultsPerWheel.Resize(wheelsCount, false);
- wheelsCount = 0;
- for (int32 i = 0, ii = 0; i < scenePhysX->WheelVehicles.Count(); i++)
- {
- auto wheelVehicle = scenePhysX->WheelVehicles[i];
- if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
- continue;
- auto drive = (PxVehicleWheels*)scenePhysX->WheelVehicles[ii]->_vehicle;
- auto& perVehicle = WheelVehiclesResultsPerVehicle[ii];
- ii++;
- perVehicle.nbWheelQueryResults = drive->mWheelsSimData.getNbWheels();
- perVehicle.wheelQueryResults = WheelVehiclesResultsPerWheel.Get() + wheelsCount;
- wheelsCount += perVehicle.nbWheelQueryResults;
- }
-
- // Update vehicles
- if (WheelVehiclesCache.Count() != 0)
- {
- PxVehicleSuspensionRaycasts(scenePhysX->WheelRaycastBatchQuery, WheelVehiclesCache.Count(), WheelVehiclesCache.Get());
- PxVehicleUpdates(scenePhysX->LastDeltaTime, scenePhysX->Scene->getGravity(), *WheelTireFrictions, WheelVehiclesCache.Count(), WheelVehiclesCache.Get(), WheelVehiclesResultsPerVehicle.Get());
- }
-
- // Synchronize state
- for (int32 i = 0, ii = 0; i < scenePhysX->WheelVehicles.Count(); i++)
- {
- auto wheelVehicle = scenePhysX->WheelVehicles[i];
- if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation())
- continue;
- auto drive = WheelVehiclesCache[ii];
- auto& perVehicle = WheelVehiclesResultsPerVehicle[ii];
- ii++;
-#if PHYSX_VEHICLE_DEBUG_TELEMETRY
- LOG(Info, "Vehicle[{}] Gear={}, RPM={}", ii, wheelVehicle->GetCurrentGear(), (int32)wheelVehicle->GetEngineRotationSpeed());
-#endif
-
- // Update wheels
- for (int32 j = 0; j < wheelVehicle->_wheelsData.Count(); j++)
- {
- auto& wheelData = wheelVehicle->_wheelsData[j];
- auto& perWheel = perVehicle.wheelQueryResults[j];
-#if PHYSX_VEHICLE_DEBUG_TELEMETRY
- LOG(Info, "Vehicle[{}] Wheel[{}] longitudinalSlip={}, lateralSlip={}, suspSpringForce={}", ii, j, Utilities::RoundTo2DecimalPlaces(perWheel.longitudinalSlip), Utilities::RoundTo2DecimalPlaces(perWheel.lateralSlip), (int32)perWheel.suspSpringForce);
-#endif
-
- auto& state = wheelData.State;
- state.IsInAir = perWheel.isInAir;
- state.TireContactCollider = perWheel.tireContactShape ? static_cast(perWheel.tireContactShape->userData) : nullptr;
- state.TireContactPoint = P2C(perWheel.tireContactPoint) + scenePhysX->Origin;
- state.TireContactNormal = P2C(perWheel.tireContactNormal);
- state.TireFriction = perWheel.tireFriction;
- state.SteerAngle = RadiansToDegrees * perWheel.steerAngle;
- state.RotationAngle = -RadiansToDegrees * drive->mWheelsDynData.getWheelRotationAngle(j);
- state.SuspensionOffset = perWheel.suspJounce;
-#if USE_EDITOR
- state.SuspensionTraceStart = P2C(perWheel.suspLineStart) + scenePhysX->Origin;
- state.SuspensionTraceEnd = P2C(perWheel.suspLineStart + perWheel.suspLineDir * perWheel.suspLineLength) + scenePhysX->Origin;
-#endif
-
- if (!wheelData.Collider)
- continue;
- auto shape = (PxShape*)wheelData.Collider->GetPhysicsShape();
-
- // Update wheel collider transformation
- auto localPose = shape->getLocalPose();
- Transform t = wheelData.Collider->GetLocalTransform();
- t.Orientation = Quaternion::Euler(-state.RotationAngle, state.SteerAngle, 0) * wheelData.LocalOrientation;
- t.Translation = P2C(localPose.p) / wheelVehicle->GetScale() - t.Orientation * wheelData.Collider->GetCenter();
- wheelData.Collider->SetLocalTransform(t);
- }
- }
- }
-#endif
-
{
PROFILE_CPU_NAMED("Physics.FlushActiveTransforms");
@@ -1860,55 +1934,8 @@ void PhysicsBackend::EndSimulateScene(void* scene)
}
#if WITH_CLOTH
- nv::cloth::Solver* clothSolver = scenePhysX->ClothSolver;
- if (clothSolver && scenePhysX->ClothsList.Count() != 0)
- {
- PROFILE_CPU_NAMED("Physics.Cloth");
-
- {
- PROFILE_CPU_NAMED("Pre");
- Function job;
- job.Bind(scenePhysX);
- JobSystem::Execute(job, scenePhysX->ClothsList.Count());
- }
-
- {
- PROFILE_CPU_NAMED("Simulation");
- if (clothSolver->beginSimulation(scenePhysX->LastDeltaTime))
- {
- Function job;
- job.Bind(scenePhysX);
- JobSystem::Execute(job, clothSolver->getSimulationChunkCount());
- clothSolver->endSimulation();
- }
- }
-
- {
- PROFILE_CPU_NAMED("Post");
- ScopeLock lock(ClothLocker);
- Array brokenCloths;
- for (auto clothPhysX : scenePhysX->ClothsList)
- {
- const auto& clothSettings = Cloths[clothPhysX];
- if (clothSettings.Culled)
- continue;
- if (clothSettings.UpdateBounds(clothPhysX))
- brokenCloths.Add(clothSettings.Actor);
- clothSettings.Actor->OnPostUpdate();
- }
- for (auto cloth : brokenCloths)
- {
- // Rebuild cloth object but keep fabric ref to prevent fabric recook
- auto fabric = &((nv::cloth::Cloth*)cloth->_cloth)->getFabric();
- Fabrics[fabric].Refs++;
- fabric->incRefCount();
- cloth->Rebuild();
- fabric->decRefCount();
- if (--Fabrics[fabric].Refs == 0)
- Fabrics.Remove(fabric);
- }
- }
- }
+ scenePhysX->UpdateCloths(scenePhysX->LastDeltaTime);
+#endif
{
PROFILE_CPU_NAMED("Physics.SendEvents");
@@ -1916,7 +1943,9 @@ void PhysicsBackend::EndSimulateScene(void* scene)
scenePhysX->EventsCallback.SendCollisionEvents();
scenePhysX->EventsCallback.SendJointEvents();
}
-#endif
+
+ // Clear delta after simulation ended
+ scenePhysX->LastDeltaTime = 0.0f;
}
Vector3 PhysicsBackend::GetSceneGravity(void* scene)
diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h
index 7267058f9..f7f245abd 100644
--- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h
+++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h
@@ -18,6 +18,7 @@ public:
static PxCooking* GetCooking();
#endif
static PxMaterial* GetDefaultMaterial();
+ static void SimulationStepDone(PxScene* scene, float dt);
};
#endif
diff --git a/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp
index dc303c89f..155642ba6 100644
--- a/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp
+++ b/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp
@@ -3,7 +3,7 @@
#if COMPILE_WITH_PHYSX
#include "PhysicsStepperPhysX.h"
-#include "Engine/Physics/Physics.h"
+#include "PhysicsBackendPhysX.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include
#include
@@ -66,7 +66,7 @@ void MultiThreadStepper::substepDone(StepperTask* ownerTask)
}
// -> OnSubstep
- //Physics::OnSubstep();
+ PhysicsBackendPhysX::SimulationStepDone(mScene, mSubStepSize);
if (mCurrentSubStep >= mNbSubSteps)
{
diff --git a/Source/Engine/Physics/Physics.cpp b/Source/Engine/Physics/Physics.cpp
index a1a5e8db2..85298bcff 100644
--- a/Source/Engine/Physics/Physics.cpp
+++ b/Source/Engine/Physics/Physics.cpp
@@ -235,76 +235,91 @@ bool Physics::LineCastAll(const Vector3& start, const Vector3& end, ArrayRayCast(origin, direction, maxDistance, layerMask, hitTriggers);
}
bool Physics::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->RayCast(origin, direction, hitInfo, maxDistance, layerMask, hitTriggers);
}
bool Physics::RayCastAll(const Vector3& origin, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->RayCastAll(origin, direction, results, maxDistance, layerMask, hitTriggers);
}
bool Physics::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->BoxCast(center, halfExtents, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->BoxCast(center, halfExtents, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->BoxCastAll(center, halfExtents, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::SphereCast(const Vector3& center, const float radius, const Vector3& direction, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->SphereCast(center, radius, direction, maxDistance, layerMask, hitTriggers);
}
bool Physics::SphereCast(const Vector3& center, const float radius, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->SphereCast(center, radius, direction, hitInfo, maxDistance, layerMask, hitTriggers);
}
bool Physics::SphereCastAll(const Vector3& center, const float radius, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->SphereCastAll(center, radius, direction, results, maxDistance, layerMask, hitTriggers);
}
bool Physics::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->CapsuleCast(center, radius, height, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->CapsuleCast(center, radius, height, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::CapsuleCastAll(const Vector3& center, const float radius, const float height, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->CapsuleCastAll(center, radius, height, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->ConvexCast(center, convexMesh, scale, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->ConvexCast(center, convexMesh, scale, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool Physics::ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return DefaultScene->ConvexCastAll(center, convexMesh, scale, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
@@ -505,76 +520,91 @@ bool PhysicsScene::LineCastAll(const Vector3& start, const Vector3& end, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::RayCastAll(_scene, origin, direction, results, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::BoxCast(_scene, center, halfExtents, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::BoxCast(_scene, center, halfExtents, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::BoxCastAll(_scene, center, halfExtents, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::SphereCast(const Vector3& center, const float radius, const Vector3& direction, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::SphereCast(_scene, center, radius, direction, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::SphereCast(const Vector3& center, const float radius, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::SphereCast(_scene, center, radius, direction, hitInfo, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::SphereCastAll(const Vector3& center, const float radius, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::SphereCastAll(_scene, center, radius, direction, results, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::CapsuleCast(_scene, center, radius, height, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::CapsuleCast(_scene, center, radius, height, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::CapsuleCastAll(const Vector3& center, const float radius, const float height, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::CapsuleCastAll(_scene, center, radius, height, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::ConvexCast(_scene, center, convexMesh, scale, direction, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::ConvexCast(_scene, center, convexMesh, scale, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers);
}
bool PhysicsScene::ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers)
{
+ ASSERT(direction.IsNormalized());
return PhysicsBackend::ConvexCastAll(_scene, center, convexMesh, scale, direction, results, rotation, maxDistance, layerMask, hitTriggers);
}
diff --git a/Source/Engine/Physics/Physics.h b/Source/Engine/Physics/Physics.h
index 977db34c2..efd11b184 100644
--- a/Source/Engine/Physics/Physics.h
+++ b/Source/Engine/Physics/Physics.h
@@ -120,7 +120,7 @@ public:
/// Performs a line between two points in the scene, returns all hitpoints infos.
///
/// The origin of the ray.
- /// The normalized direction of the ray.
+ /// The end position of the line.
/// The result hits. Valid only when method returns true.
/// The layer mask used to filter the results.
/// If set to true triggers will be hit, otherwise will skip them.
diff --git a/Source/Engine/Scripting/BinaryModule.h b/Source/Engine/Scripting/BinaryModule.h
index fa7540cf2..6d9baf5ce 100644
--- a/Source/Engine/Scripting/BinaryModule.h
+++ b/Source/Engine/Scripting/BinaryModule.h
@@ -118,7 +118,7 @@ public:
///
/// Tries to find a method in a given scripting type by the method name and parameters count.
///
- /// If the the type contains more than one method of the given name and parameters count the returned value can be non-deterministic (one of the matching methods).
+ /// If the type contains more than one method of the given name and parameters count the returned value can be non-deterministic (one of the matching methods).
/// The type to find method inside it.
/// The method name.
/// The method parameters count.
@@ -182,7 +182,7 @@ public:
/// Gets the value of a given scripting field.
///
/// The field.
- /// The object instance to get it's member field. Unused for static fields.
+ /// The object instance to get its member field. Unused for static fields.
/// The output field value.
/// True if failed, otherwise false.
virtual bool GetFieldValue(void* field, const Variant& instance, Variant& result)
@@ -194,7 +194,7 @@ public:
/// Sets the value of a given scripting field.
///
/// The field.
- /// The object instance to set it's member field. Unused for static fields.
+ /// The object instance to set its member field. Unused for static fields.
/// The field value to assign.
/// True if failed, otherwise false.
virtual bool SetFieldValue(void* field, const Variant& instance, Variant& value)
@@ -242,7 +242,7 @@ public:
///
/// Unloads the module (native library and C# assembly and any other scripting data). Unregisters the module.
///
- /// If true module is during reloading and should force release the runtime data. Used for C# assembly to cleanup it's runtime data in Mono (or other scripting runtime).
+ /// If true module is during reloading and should force release the runtime data. Used for C# assembly to clean up it's runtime data in Mono (or other scripting runtime).
virtual void Destroy(bool isReloading);
};
diff --git a/Source/Engine/Serialization/JsonConverters.cs b/Source/Engine/Serialization/JsonConverters.cs
index c7d937abc..5be6a6a53 100644
--- a/Source/Engine/Serialization/JsonConverters.cs
+++ b/Source/Engine/Serialization/JsonConverters.cs
@@ -429,6 +429,60 @@ namespace FlaxEngine.Json
}
}
+ internal sealed class JsonAssetReferenceConverter : JsonConverter
+ {
+ ///
+ public override unsafe void WriteJson(JsonWriter writer, object value, Newtonsoft.Json.JsonSerializer serializer)
+ {
+ var asset = (JsonAsset)value.GetType().GetField("Asset").GetValue(value);
+ var id = asset?.ID ?? Guid.Empty;
+ writer.WriteValue(JsonSerializer.GetStringID(&id));
+ }
+
+ ///
+ public override object ReadJson(JsonReader reader, Type objectType, object existingValue, Newtonsoft.Json.JsonSerializer serializer)
+ {
+ var result = Activator.CreateInstance(objectType);
+ if (reader.TokenType == JsonToken.String)
+ {
+ JsonSerializer.ParseID((string)reader.Value, out var id);
+ var asset = Content.LoadAsync(id);
+ objectType.GetField("Asset").SetValue(result, asset);
+ }
+ else if (reader.TokenType == JsonToken.StartObject)
+ {
+ // [Deprecated on 26.07.2024, expires on 26.07.2026]
+ while (reader.Read() && reader.TokenType != JsonToken.EndObject)
+ {
+ switch (reader.TokenType)
+ {
+ case JsonToken.PropertyName:
+ {
+ var propertyName = (string)reader.Value;
+ reader.Read();
+ if (propertyName == "Asset" && reader.TokenType == JsonToken.String)
+ {
+ JsonSerializer.ParseID((string)reader.Value, out var id);
+ var asset = Content.LoadAsync(id);
+ objectType.GetField("Asset").SetValue(result, asset);
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ ///
+ public override bool CanConvert(Type objectType)
+ {
+ return objectType.Name.StartsWith("JsonAssetReference");
+ }
+ }
+
/*
///
/// Serialize Guid values using `N` format
diff --git a/Source/Engine/Terrain/TerrainPatch.cpp b/Source/Engine/Terrain/TerrainPatch.cpp
index 4272cabe8..b7947288b 100644
--- a/Source/Engine/Terrain/TerrainPatch.cpp
+++ b/Source/Engine/Terrain/TerrainPatch.cpp
@@ -1963,6 +1963,7 @@ bool TerrainPatch::UpdateCollision()
bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
if (_physicsShape == nullptr)
return false;
Vector3 shapePos;
@@ -1973,6 +1974,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa
bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, Vector3& resultHitNormal, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
if (_physicsShape == nullptr)
return false;
Vector3 shapePos;
@@ -1990,6 +1992,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa
bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, TerrainChunk*& resultChunk, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
if (_physicsShape == nullptr)
return false;
Vector3 shapePos;
@@ -2027,6 +2030,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa
bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, float maxDistance) const
{
+ ASSERT(direction.IsNormalized());
if (_physicsShape == nullptr)
return false;
Vector3 shapePos;
diff --git a/Source/Engine/Tests/TestPrefabs.cpp b/Source/Engine/Tests/TestPrefabs.cpp
index 02af56a8e..8c6e29249 100644
--- a/Source/Engine/Tests/TestPrefabs.cpp
+++ b/Source/Engine/Tests/TestPrefabs.cpp
@@ -189,7 +189,7 @@ TEST_CASE("Prefabs")
Guid newChildId;
Guid::Parse(TEXT("123456a04cc60d56a2f024bfeef57723"), newChildId);
auto newChild = EmptyActor::Spawn(ScriptingObject::SpawnParams(newChildId, EmptyActor::TypeInitializer));
- newChild->SetName(TEXT("Prefab B.Child"));
+ newChild->SetName(String(TEXT("Prefab B.Child")));
newChild->SetParent(instanceB);
// Apply nested prefab changes
@@ -213,7 +213,7 @@ TEST_CASE("Prefabs")
// Add another child
Guid::Parse(TEXT("678906a04cc60d56a2f024bfeef57723"), newChildId);
newChild = EmptyActor::Spawn(ScriptingObject::SpawnParams(newChildId, EmptyActor::TypeInitializer));
- newChild->SetName(TEXT("Prefab B.Child 2"));
+ newChild->SetName(String(TEXT("Prefab B.Child 2")));
newChild->SetParent(instanceB);
// Apply nested prefab changes
diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp
index 08ef842a4..1cf679c5d 100644
--- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp
+++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp
@@ -735,6 +735,25 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value)
_writer.Write(*triplanarTexture);
value = result;
}
+ // Get Lightmap UV
+ case 18:
+ {
+ auto output = writeLocal(Value::InitForZero(ValueType::Float2), node);
+ auto lightmapUV = String::Format(TEXT(
+ "{{\n"
+ "#if USE_LIGHTMAP\n"
+ "\t {0} = input.LightmapUV;\n"
+ "#else\n"
+ "\t {0} = float2(0,0);\n"
+ "#endif\n"
+ "}}\n"
+ ), output.Value);
+
+ _writer.Write(*lightmapUV);
+
+ value = output;
+ break;
+ }
default:
break;
}
diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp
index cf124f977..dcb0b6c5f 100644
--- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp
+++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp
@@ -6,6 +6,7 @@
#include "Engine/Core/Log.h"
#include "Engine/Core/Math/Mathd.h"
#include "Engine/Core/Math/Matrix.h"
+#include "Engine/Core/Math/Plane.h"
#include "Engine/Core/Collections/Sorting.h"
#include "Engine/Platform/FileSystem.h"
#include "Engine/Tools/TextureTool/TextureTool.h"
@@ -13,6 +14,11 @@
#include "Engine/Platform/File.h"
#define OPEN_FBX_CONVERT_SPACE 1
+#if BUILD_DEBUG
+#define OPEN_FBX_GET_CACHE_LIST(arrayName, varName, size) data.arrayName.Resize(size, false); auto& varName = data.arrayName
+#else
+#define OPEN_FBX_GET_CACHE_LIST(arrayName, varName, size) data.arrayName.Resize(size, false); auto* varName = data.arrayName.Get()
+#endif
// Import OpenFBX library
// Source: https://github.com/nem0/OpenFBX
@@ -49,7 +55,7 @@ Quaternion ToQuaternion(const ofbx::Quat& v)
return Quaternion((float)v.x, (float)v.y, (float)v.z, (float)v.w);
}
-Matrix ToMatrix(const ofbx::Matrix& mat)
+Matrix ToMatrix(const ofbx::DMatrix& mat)
{
Matrix result;
for (int32 i = 0; i < 16; i++)
@@ -103,6 +109,13 @@ struct OpenFbxImporterData
Array Materials;
Array ImportedMaterials;
+ Array TriangulatedIndicesCache;
+ Array BlendIndicesCache;
+ Array BlendWeightsCache;
+ Array TriangulatePointsCache;
+ Array TriangulateIndicesCache;
+ Array TriangulateEarIndicesCache;
+
OpenFbxImporterData(const String& path, const ModelTool::Options& options, ofbx::IScene* scene)
: Scene(scene)
, ScenePtr(scene)
@@ -416,7 +429,7 @@ void ProcessNodes(OpenFbxImporterData& data, const ofbx::Object* aNode, int32 pa
Matrix GetOffsetMatrix(OpenFbxImporterData& data, const ofbx::Mesh* mesh, const ofbx::Object* node)
{
#if 1
- auto* skin = mesh ? mesh->getGeometry()->getSkin() : nullptr;
+ auto* skin = mesh ? mesh->getSkin() : nullptr;
if (skin)
{
for (int i = 0, c = skin->getClusterCount(); i < c; i++)
@@ -445,7 +458,7 @@ Matrix GetOffsetMatrix(OpenFbxImporterData& data, const ofbx::Mesh* mesh, const
bool IsMeshInvalid(const ofbx::Mesh* aMesh)
{
- return aMesh->getGeometry()->getVertexCount() == 0;
+ return aMesh->getGeometryData().getPositions().count == 0;
}
bool ImportBones(OpenFbxImporterData& data, String& errorMsg)
@@ -455,8 +468,7 @@ bool ImportBones(OpenFbxImporterData& data, String& errorMsg)
for (int i = 0; i < meshCount; i++)
{
const auto aMesh = data.Scene->getMesh(i);
- const auto aGeometry = aMesh->getGeometry();
- const ofbx::Skin* skin = aGeometry->getSkin();
+ const ofbx::Skin* skin = aMesh->getSkin();
if (skin == nullptr || IsMeshInvalid(aMesh))
continue;
@@ -524,56 +536,198 @@ bool ImportBones(OpenFbxImporterData& data, String& errorMsg)
return false;
}
-bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, MeshData& mesh, String& errorMsg, int32 triangleStart, int32 triangleEnd)
+int Triangulate(OpenFbxImporterData& data, const ofbx::GeometryData& geom, const ofbx::GeometryPartition::Polygon& polygon, int* triangulatedIndices)
+{
+ if (polygon.vertex_count < 3)
+ return 0;
+ else if (polygon.vertex_count == 3)
+ {
+ triangulatedIndices[0] = polygon.from_vertex;
+ triangulatedIndices[1] = polygon.from_vertex + 1;
+ triangulatedIndices[2] = polygon.from_vertex + 2;
+ return 3;
+ }
+ else if (polygon.vertex_count == 4)
+ {
+ triangulatedIndices[0] = polygon.from_vertex + 0;
+ triangulatedIndices[1] = polygon.from_vertex + 1;
+ triangulatedIndices[2] = polygon.from_vertex + 2;
+ triangulatedIndices[3] = polygon.from_vertex + 0;
+ triangulatedIndices[4] = polygon.from_vertex + 2;
+ triangulatedIndices[5] = polygon.from_vertex + 3;
+ return 6;
+ }
+
+ const ofbx::Vec3Attributes& positions = geom.getPositions();
+ Float3 normal = ToFloat3(geom.getNormals().get(polygon.from_vertex));
+
+ // Check if the polygon is convex
+ int lastSign = 0;
+ bool isConvex = true;
+ for (int i = 0; i < polygon.vertex_count; i++)
+ {
+ Float3 v1 = ToFloat3(positions.get(polygon.from_vertex + i));
+ Float3 v2 = ToFloat3(positions.get(polygon.from_vertex + (i + 1) % polygon.vertex_count));
+ Float3 v3 = ToFloat3(positions.get(polygon.from_vertex + (i + 2) % polygon.vertex_count));
+
+ // The winding order of all triangles must be same for polygon to be considered convex
+ int sign;
+ Float3 c = Float3::Cross(v1 - v2, v3 - v2);
+ if (c.LengthSquared() == 0.0f)
+ continue;
+ else if (Math::NotSameSign(c.X, normal.X) || Math::NotSameSign(c.Y, normal.Y) || Math::NotSameSign(c.Z, normal.Z))
+ sign = 1;
+ else
+ sign = -1;
+ if ((sign < 0 && lastSign > 0) || (sign > 0 && lastSign < 0))
+ {
+ isConvex = false;
+ break;
+ }
+ lastSign += sign;
+ }
+
+ // Fast-path for convex case
+ if (isConvex)
+ {
+ for (int i = 0; i < polygon.vertex_count - 2; i++)
+ {
+ triangulatedIndices[i * 3 + 0] = polygon.from_vertex;
+ triangulatedIndices[i * 3 + 1] = polygon.from_vertex + (i + 1) % polygon.vertex_count;
+ triangulatedIndices[i * 3 + 2] = polygon.from_vertex + (i + 2) % polygon.vertex_count;
+ }
+ return 3 * (polygon.vertex_count - 2);
+ }
+
+ // Setup arrays for temporary data (TODO: maybe double-linked list is more optimal?)
+ auto& points = data.TriangulatePointsCache;
+ auto& indices = data.TriangulateIndicesCache;
+ auto& earIndices = data.TriangulateEarIndicesCache;
+ points.Clear();
+ indices.Clear();
+ earIndices.Clear();
+ points.EnsureCapacity(polygon.vertex_count, false);
+ indices.EnsureCapacity(polygon.vertex_count, false);
+ earIndices.EnsureCapacity(3 * (polygon.vertex_count - 2), false);
+
+ // Project points to a plane, choose two arbitrary axises
+ const Float3 u = Float3::Cross(normal, Math::Abs(normal.X) > Math::Abs(normal.Y) ? Float3::Up : Float3::Right).GetNormalized();
+ const Float3 v = Float3::Cross(normal, u).GetNormalized();
+ for (int i = 0; i < polygon.vertex_count; i++)
+ {
+ const Float3 point = ToFloat3(positions.get(polygon.from_vertex + i));
+ const Float3 projectedPoint = Float3::ProjectOnPlane(point, normal);
+ const Float2 pointOnPlane = Float2(
+ projectedPoint.X * u.X + projectedPoint.Y * u.Y + projectedPoint.Z * u.Z,
+ projectedPoint.X * v.X + projectedPoint.Y * v.Y + projectedPoint.Z * v.Z);
+
+ points.Add(pointOnPlane);
+ indices.Add(i);
+ }
+
+ // Triangulate non-convex polygons using simple ear-clipping algorithm (https://nils-olovsson.se/articles/ear_clipping_triangulation/)
+ const int maxIterations = indices.Count() * 10; // Safe guard to prevent infinite loop
+ int index = 0;
+ while (indices.Count() > 3 && index < maxIterations)
+ {
+ const int i1 = index % indices.Count();
+ const int i2 = (index + 1) % indices.Count();
+ const int i3 = (index + 2) % indices.Count();
+ const Float2 p1 = points[indices[i1]];
+ const Float2 p2 = points[indices[i2]];
+ const Float2 p3 = points[indices[i3]];
+
+ // TODO: Skip triangles with very sharp angles?
+
+ // Skip reflex vertices
+ if (Float2::Cross(p2 - p1, p3 - p1) < 0.0f)
+ {
+ index++;
+ continue;
+ }
+
+ // The triangle is considered to be an "ear" when no other points reside inside the triangle
+ bool isEar = true;
+ for (int j = 0; j < indices.Count(); j++)
+ {
+ if (j == i1 || j == i2 || j == i3)
+ continue;
+ const Float2 candidate = points[indices[j]];
+ if (CollisionsHelper::IsPointInTriangle(candidate, p1, p2, p3))
+ {
+ isEar = false;
+ break;
+ }
+ }
+ if (!isEar)
+ {
+ index++;
+ continue;
+ }
+
+ // Add an ear and remove the tip point from evaluation
+ earIndices.Add(indices[i1]);
+ earIndices.Add(indices[i2]);
+ earIndices.Add(indices[i3]);
+ indices.RemoveAtKeepOrder(i2);
+ }
+
+ for (int i = 0; i < earIndices.Count(); i++)
+ triangulatedIndices[i] = polygon.from_vertex + (earIndices[i] % polygon.vertex_count);
+ triangulatedIndices[earIndices.Count() + 0] = polygon.from_vertex + (indices[0] % polygon.vertex_count);
+ triangulatedIndices[earIndices.Count() + 1] = polygon.from_vertex + (indices[1] % polygon.vertex_count);
+ triangulatedIndices[earIndices.Count() + 2] = polygon.from_vertex + (indices[2] % polygon.vertex_count);
+
+ return 3 * (polygon.vertex_count - 2);
+}
+
+bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, MeshData& mesh, String& errorMsg, int partitionIndex)
{
PROFILE_CPU();
mesh.Name = aMesh->name;
ZoneText(*mesh.Name, mesh.Name.Length());
- const int32 firstVertexOffset = triangleStart * 3;
- const int32 lastVertexOffset = triangleEnd * 3;
- const ofbx::Geometry* aGeometry = aMesh->getGeometry();
- const int vertexCount = lastVertexOffset - firstVertexOffset + 3;
- ASSERT(firstVertexOffset + vertexCount <= aGeometry->getVertexCount());
- const ofbx::Vec3* vertices = aGeometry->getVertices();
- const ofbx::Vec3* normals = aGeometry->getNormals();
- const ofbx::Vec3* tangents = aGeometry->getTangents();
- const ofbx::Vec4* colors = aGeometry->getColors();
- const ofbx::Vec2* uvs = aGeometry->getUVs();
- const ofbx::Skin* skin = aGeometry->getSkin();
- const ofbx::BlendShape* blendShape = aGeometry->getBlendShape();
+ const ofbx::GeometryData& geometryData = aMesh->getGeometryData();
+ const ofbx::GeometryPartition& partition = geometryData.getPartition(partitionIndex);
+ const int vertexCount = partition.triangles_count * 3;
+ const ofbx::Vec3Attributes& positions = geometryData.getPositions();
+ const ofbx::Vec2Attributes& uvs = geometryData.getUVs();
+ const ofbx::Vec3Attributes& normals = geometryData.getNormals();
+ const ofbx::Vec3Attributes& tangents = geometryData.getTangents();
+ const ofbx::Vec4Attributes& colors = geometryData.getColors();
+ const ofbx::Skin* skin = aMesh->getSkin();
+ const ofbx::BlendShape* blendShape = aMesh->getBlendShape();
+ OPEN_FBX_GET_CACHE_LIST(TriangulatedIndicesCache, triangulatedIndices, vertexCount);
// Properties
const ofbx::Material* aMaterial = nullptr;
if (aMesh->getMaterialCount() > 0)
- {
- if (aGeometry->getMaterials())
- aMaterial = aMesh->getMaterial(aGeometry->getMaterials()[triangleStart]);
- else
- aMaterial = aMesh->getMaterial(0);
- }
+ aMaterial = aMesh->getMaterial(partitionIndex);
mesh.MaterialSlotIndex = data.AddMaterial(result, aMaterial);
// Vertex positions
mesh.Positions.Resize(vertexCount, false);
- for (int i = 0; i < vertexCount; i++)
- mesh.Positions.Get()[i] = ToFloat3(vertices[i + firstVertexOffset]);
+ {
+ int numIndicesTotal = 0;
+ for (int i = 0; i < partition.polygon_count; i++)
+ {
+ int numIndices = Triangulate(data, geometryData, partition.polygons[i], &triangulatedIndices[numIndicesTotal]);
+ for (int j = numIndicesTotal; j < numIndicesTotal + numIndices; j++)
+ mesh.Positions.Get()[j] = ToFloat3(positions.get(triangulatedIndices[j]));
+ numIndicesTotal += numIndices;
+ }
+ }
// Indices (dummy index buffer)
- if (vertexCount % 3 != 0)
- {
- errorMsg = TEXT("Invalid vertex count. It must be multiple of 3.");
- return true;
- }
mesh.Indices.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
mesh.Indices.Get()[i] = i;
// Texture coordinates
- if (uvs)
+ if (uvs.values)
{
mesh.UVs.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
- mesh.UVs.Get()[i] = ToFloat2(uvs[i + firstVertexOffset]);
+ mesh.UVs.Get()[i] = ToFloat2(uvs.get(triangulatedIndices[i]));
if (data.ConvertRH)
{
for (int32 v = 0; v < vertexCount; v++)
@@ -582,7 +736,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
}
// Normals
- if (data.Options.CalculateNormals || !normals)
+ if (data.Options.CalculateNormals || !normals.values)
{
if (mesh.GenerateNormals(data.Options.SmoothingNormalsAngle))
{
@@ -590,11 +744,11 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
return true;
}
}
- else if (normals)
+ else if (normals.values)
{
mesh.Normals.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
- mesh.Normals.Get()[i] = ToFloat3(normals[i + firstVertexOffset]);
+ mesh.Normals.Get()[i] = ToFloat3(normals.get(triangulatedIndices[i]));
if (data.ConvertRH)
{
// Mirror normals along the Z axis
@@ -604,15 +758,15 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
}
// Tangents
- if ((data.Options.CalculateTangents || !tangents) && mesh.UVs.HasItems())
+ if ((data.Options.CalculateTangents || !tangents.values) && mesh.UVs.HasItems())
{
// Generated after full mesh data conversion
}
- else if (tangents)
+ else if (tangents.values)
{
mesh.Tangents.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
- mesh.Tangents.Get()[i] = ToFloat3(tangents[i + firstVertexOffset]);
+ mesh.Tangents.Get()[i] = ToFloat3(tangents.get(triangulatedIndices[i]));
if (data.ConvertRH)
{
// Mirror tangents along the Z axis
@@ -658,12 +812,12 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
}
// Check if has that channel texcoords
- const auto lightmapUVs = aGeometry->getUVs(inputChannelIndex);
- if (lightmapUVs)
+ const auto lightmapUVs = geometryData.getUVs(inputChannelIndex);
+ if (lightmapUVs.values)
{
mesh.LightmapUVs.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
- mesh.LightmapUVs.Get()[i] = ToFloat2(lightmapUVs[i + firstVertexOffset]);
+ mesh.LightmapUVs.Get()[i] = ToFloat2(lightmapUVs.get(triangulatedIndices[i]));
if (data.ConvertRH)
{
for (int32 v = 0; v < vertexCount; v++)
@@ -677,20 +831,20 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
}
// Vertex Colors
- if (data.Options.ImportVertexColors && colors)
+ if (data.Options.ImportVertexColors && colors.values)
{
mesh.Colors.Resize(vertexCount, false);
for (int i = 0; i < vertexCount; i++)
- mesh.Colors.Get()[i] = ToColor(colors[i + firstVertexOffset]);
+ mesh.Colors.Get()[i] = ToColor(colors.get(triangulatedIndices[i]));
}
// Blend Indices and Blend Weights
if (skin && skin->getClusterCount() > 0 && EnumHasAnyFlags(data.Options.ImportTypes, ImportDataTypes::Skeleton))
{
- mesh.BlendIndices.Resize(vertexCount);
- mesh.BlendWeights.Resize(vertexCount);
- mesh.BlendIndices.SetAll(Int4::Zero);
- mesh.BlendWeights.SetAll(Float4::Zero);
+ OPEN_FBX_GET_CACHE_LIST(BlendIndicesCache, blendIndices, positions.values_count);
+ OPEN_FBX_GET_CACHE_LIST(BlendWeightsCache, blendWeights, positions.values_count);
+ data.BlendIndicesCache.SetAll(Int4::Zero);
+ data.BlendWeightsCache.SetAll(Float4::Zero);
for (int clusterIndex = 0, clusterCount = skin->getClusterCount(); clusterIndex < clusterCount; clusterIndex++)
{
@@ -718,12 +872,12 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
const double* clusterWeights = cluster->getWeights();
for (int j = 0; j < cluster->getIndicesCount(); j++)
{
- int vtxIndex = clusterIndices[j] - firstVertexOffset;
+ int vtxIndex = clusterIndices[j];
float vtxWeight = (float)clusterWeights[j];
- if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= vertexCount)
+ if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= positions.values_count)
continue;
- Int4& indices = mesh.BlendIndices.Get()[vtxIndex];
- Float4& weights = mesh.BlendWeights.Get()[vtxIndex];
+ Int4& indices = blendIndices[vtxIndex];
+ Float4& weights = blendWeights[vtxIndex];
for (int32 k = 0; k < 4; k++)
{
@@ -745,6 +899,16 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
}
}
+ // Remap blend values to triangulated data
+ mesh.BlendIndices.Resize(vertexCount, false);
+ mesh.BlendWeights.Resize(vertexCount, false);
+ for (int i = 0; i < vertexCount; i++)
+ {
+ const int idx = positions.indices[triangulatedIndices[i]];
+ mesh.BlendIndices.Get()[i] = blendIndices[idx];
+ mesh.BlendWeights.Get()[i] = blendWeights[idx];
+ }
+
mesh.NormalizeBlendWeights();
}
@@ -756,44 +920,43 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
{
const ofbx::BlendShapeChannel* channel = blendShape->getBlendShapeChannel(channelIndex);
- // Use last shape
+ // Use the last shape
const int targetShapeCount = channel->getShapeCount();
if (targetShapeCount == 0)
continue;
const ofbx::Shape* shape = channel->getShape(targetShapeCount - 1);
-
- if (shape->getVertexCount() != aGeometry->getVertexCount())
+ const ofbx::Vec3* shapeVertices = shape->getVertices();
+ const ofbx::Vec3* shapeNormals = shape->getNormals();
+ const int* shapeIndices = shape->getIndices();
+ const int shapeVertexCount = shape->getVertexCount();
+ const int shapeIndexCount = shape->getIndexCount();
+ if (shapeVertexCount != shapeIndexCount)
{
- LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) than mesh ({3})", String(shape->name), mesh.Name, shape->getVertexCount(), aGeometry->getVertexCount());
+ LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) and indices ({3})", String(shape->name), mesh.Name, shapeVertexCount, shapeIndexCount);
continue;
}
BlendShape& blendShapeData = mesh.BlendShapes.AddOne();
blendShapeData.Name = shape->name;
blendShapeData.Weight = channel->getShapeCount() > 1 ? (float)(channel->getDeformPercent() / 100.0) : 1.0f;
+ blendShapeData.Vertices.EnsureCapacity(shapeIndexCount);
- blendShapeData.Vertices.Resize(vertexCount);
- for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++)
- blendShapeData.Vertices.Get()[i].VertexIndex = i;
-
- auto shapeVertices = shape->getVertices();
- for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++)
+ for (int32 i = 0; i < shapeIndexCount; i++)
{
- auto delta = ToFloat3(shapeVertices[i + firstVertexOffset]) - mesh.Positions.Get()[i];
- blendShapeData.Vertices.Get()[i].PositionDelta = delta;
- }
-
- auto shapeNormals = shape->getNormals();
- for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++)
- {
- auto delta = ToFloat3(shapeNormals[i + firstVertexOffset]);
- if (data.ConvertRH)
+ int shapeIndex = shapeIndices[i];
+ BlendShapeVertex v;
+ v.PositionDelta = ToFloat3(shapeVertices[i]);
+ v.NormalDelta = shapeNormals ? ToFloat3(shapeNormals[i]) : Float3::Zero;
+ for (int32 vertexIndex = 0; vertexIndex < vertexCount; vertexIndex++)
{
- // Mirror normals along the Z axis
- delta.Z *= -1.0f;
+ int sourceIndex = positions.indices[triangulatedIndices[vertexIndex]];
+ if (sourceIndex == shapeIndex)
+ {
+ // Add blend shape vertex
+ v.VertexIndex = vertexIndex;
+ blendShapeData.Vertices.Add(v);
+ }
}
- delta = delta - mesh.Normals.Get()[i];
- blendShapeData.Vertices.Get()[i].NormalDelta = delta;
}
}
}
@@ -806,7 +969,10 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
for (auto& blendShapeData : mesh.BlendShapes)
{
for (auto& v : blendShapeData.Vertices)
+ {
v.PositionDelta.Z *= -1.0f;
+ v.NormalDelta.Z *= -1.0f;
+ }
}
}
@@ -820,7 +986,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
Swap(mesh.Indices.Get()[i], mesh.Indices.Get()[i + 2]);
}
- if ((data.Options.CalculateTangents || !tangents) && mesh.UVs.HasItems())
+ if ((data.Options.CalculateTangents || !tangents.values) && mesh.UVs.HasItems())
{
if (mesh.GenerateTangents(data.Options.SmoothingTangentsAngle))
{
@@ -858,7 +1024,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
return false;
}
-bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, String& errorMsg, int32 triangleStart, int32 triangleEnd)
+bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, String& errorMsg, int partitionIndex)
{
PROFILE_CPU();
@@ -899,7 +1065,7 @@ bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
// Import mesh data
MeshData* meshData = New();
- if (ProcessMesh(result, data, aMesh, *meshData, errorMsg, triangleStart, triangleEnd))
+ if (ProcessMesh(result, data, aMesh, *meshData, errorMsg, partitionIndex))
return true;
// Link mesh
@@ -916,36 +1082,17 @@ bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh*
bool ImportMesh(int32 index, ModelData& result, OpenFbxImporterData& data, String& errorMsg)
{
const auto aMesh = data.Scene->getMesh(index);
- const auto aGeometry = aMesh->getGeometry();
- const auto trianglesCount = aGeometry->getVertexCount() / 3;
if (IsMeshInvalid(aMesh))
return false;
- if (aMesh->getMaterialCount() < 2 || !aGeometry->getMaterials())
+ const auto& geomData = aMesh->getGeometryData();
+ for (int i = 0; i < geomData.getPartitionCount(); i++)
{
- // Fast path if mesh is using single material for all triangles
- if (ImportMesh(result, data, aMesh, errorMsg, 0, trianglesCount - 1))
- return true;
- }
- else
- {
- // Create mesh for each sequence of triangles that share the same material
- const auto materials = aGeometry->getMaterials();
- int32 rangeStart = 0;
- int32 rangeStartVal = materials[rangeStart];
- for (int32 triangleIndex = 1; triangleIndex < trianglesCount; triangleIndex++)
- {
- if (rangeStartVal != materials[triangleIndex])
- {
- if (ImportMesh(result, data, aMesh, errorMsg, rangeStart, triangleIndex - 1))
- return true;
+ const auto& partition = geomData.getPartition(i);
+ if (partition.polygon_count == 0)
+ continue;
- // Start a new range
- rangeStart = triangleIndex;
- rangeStartVal = materials[triangleIndex];
- }
- }
- if (ImportMesh(result, data, aMesh, errorMsg, rangeStart, trianglesCount - 1))
+ if (ImportMesh(result, data, aMesh, errorMsg, i))
return true;
}
return false;
@@ -962,35 +1109,35 @@ struct AnimInfo
struct Frame
{
- ofbx::Vec3 Translation;
- ofbx::Vec3 Rotation;
- ofbx::Vec3 Scaling;
+ ofbx::DVec3 Translation;
+ ofbx::DVec3 Rotation;
+ ofbx::DVec3 Scaling;
};
-void ExtractKeyframePosition(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Float3& keyframe)
+void ExtractKeyframePosition(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Float3& keyframe)
{
const Matrix frameTrans = ToMatrix(bone->evalLocal(trans, localFrame.Rotation, localFrame.Scaling));
keyframe = frameTrans.GetTranslation();
}
-void ExtractKeyframeRotation(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Quaternion& keyframe)
+void ExtractKeyframeRotation(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Quaternion& keyframe)
{
const Matrix frameTrans = ToMatrix(bone->evalLocal(localFrame.Translation, trans, { 1.0, 1.0, 1.0 }));
Quaternion::RotationMatrix(frameTrans, keyframe);
}
-void ExtractKeyframeScale(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Float3& keyframe)
+void ExtractKeyframeScale(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Float3& keyframe)
{
// Fix empty scale case
if (Math::IsZero(trans.x) && Math::IsZero(trans.y) && Math::IsZero(trans.z))
trans = { 1.0, 1.0, 1.0 };
- const Matrix frameTrans = ToMatrix(bone->evalLocal(localFrame.Translation, localFrame.Rotation, trans));
+ const Matrix frameTrans = ToMatrix(bone->evalLocal(localFrame.Translation, { 0.0, 0.0, 0.0 }, trans));
keyframe = frameTrans.GetScaleVector();
}
template
-void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curve, AnimInfo& info, void (*ExtractKeyframe)(const ofbx::Object*, ofbx::Vec3&, const Frame&, T&))
+void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curve, AnimInfo& info, void (*ExtractKeyframe)(const ofbx::Object*, ofbx::DVec3&, const Frame&, T&))
{
if (curveNode == nullptr)
return;
@@ -1008,7 +1155,7 @@ void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curv
key.Time = (float)i;
- ofbx::Vec3 trans = curveNode->getNodeLocalTransform(t);
+ ofbx::DVec3 trans = curveNode->getNodeLocalTransform(t);
ExtractKeyframe(bone, trans, localFrame, key.Value);
}
}
@@ -1125,21 +1272,26 @@ bool ModelTool::ImportDataOpenFBX(const String& path, ModelData& data, Options&
errorMsg = TEXT("Cannot load file.");
return true;
}
- ofbx::u64 loadFlags = 0;
+ ofbx::LoadFlags loadFlags = ofbx::LoadFlags::NONE;
if (EnumHasAnyFlags(options.ImportTypes, ImportDataTypes::Geometry))
{
- loadFlags |= (ofbx::u64)ofbx::LoadFlags::TRIANGULATE;
if (!options.ImportBlendShapes)
- loadFlags |= (ofbx::u64)ofbx::LoadFlags::IGNORE_BLEND_SHAPES;
+ loadFlags |= ofbx::LoadFlags::IGNORE_BLEND_SHAPES;
}
else
{
- loadFlags |= (ofbx::u64)ofbx::LoadFlags::IGNORE_GEOMETRY | (ofbx::u64)ofbx::LoadFlags::IGNORE_BLEND_SHAPES;
+ loadFlags |= ofbx::LoadFlags::IGNORE_GEOMETRY | ofbx::LoadFlags::IGNORE_BLEND_SHAPES;
}
+ if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Materials))
+ loadFlags |= ofbx::LoadFlags::IGNORE_MATERIALS;
+ if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Textures))
+ loadFlags |= ofbx::LoadFlags::IGNORE_TEXTURES;
+ if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Animations))
+ loadFlags |= ofbx::LoadFlags::IGNORE_ANIMATIONS;
ofbx::IScene* scene;
{
PROFILE_CPU_NAMED("ofbx::load");
- scene = ofbx::load(fileData.Get(), fileData.Count(), loadFlags);
+ scene = ofbx::load(fileData.Get(), fileData.Count(), (ofbx::u16)loadFlags);
}
if (!scene)
{
diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp
index b71267e4c..29482c3f6 100644
--- a/Source/Engine/Tools/ModelTool/ModelTool.cpp
+++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp
@@ -1349,31 +1349,31 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option
mesh->BlendIndices.SetAll(indices);
mesh->BlendWeights.SetAll(weights);
}
-#if BUILD_DEBUG
else
{
auto& indices = mesh->BlendIndices;
for (int32 j = 0; j < indices.Count(); j++)
{
- const int32 min = indices[j].MinValue();
- const int32 max = indices[j].MaxValue();
+ const Int4 ij = indices.Get()[j];
+ const int32 min = ij.MinValue();
+ const int32 max = ij.MaxValue();
if (min < 0 || max >= data.Skeleton.Bones.Count())
{
LOG(Warning, "Imported mesh \'{0}\' has invalid blend indices. It may result in invalid rendering.", mesh->Name);
+ break;
}
}
-
auto& weights = mesh->BlendWeights;
for (int32 j = 0; j < weights.Count(); j++)
{
- const float sum = weights[j].SumValues();
+ const float sum = weights.Get()[j].SumValues();
if (Math::Abs(sum - 1.0f) > ZeroTolerance)
{
LOG(Warning, "Imported mesh \'{0}\' has invalid blend weights. It may result in invalid rendering.", mesh->Name);
+ break;
}
}
}
-#endif
}
}
if (EnumHasAnyFlags(options.ImportTypes, ImportDataTypes::Animations))
diff --git a/Source/ThirdParty/OpenFBX/libdeflate.cpp b/Source/ThirdParty/OpenFBX/libdeflate.cpp
new file mode 100644
index 000000000..2e2d5355d
--- /dev/null
+++ b/Source/ThirdParty/OpenFBX/libdeflate.cpp
@@ -0,0 +1,4193 @@
+// ofbx changes : removed unused code, single .h and .c
+/*
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * This is a highly optimized DEFLATE decompressor. It is much faster than
+ * vanilla zlib, typically well over twice as fast, though results vary by CPU.
+ *
+ * Why this is faster than vanilla zlib:
+ *
+ * - Word accesses rather than byte accesses when reading input
+ * - Word accesses rather than byte accesses when copying matches
+ * - Faster Huffman decoding combined with various DEFLATE-specific tricks
+ * - Larger bitbuffer variable that doesn't need to be refilled as often
+ * - Other optimizations to remove unnecessary branches
+ * - Only full-buffer decompression is supported, so the code doesn't need to
+ * support stopping and resuming decompression.
+ * - On x86_64, a version of the decompression routine is compiled with BMI2
+ * instructions enabled and is used automatically at runtime when supported.
+ */
+
+/*
+ * lib_common.h - internal header included by all library code
+ */
+
+#ifndef LIB_LIB_COMMON_H
+#define LIB_LIB_COMMON_H
+
+#ifdef LIBDEFLATE_H
+ /*
+ * When building the library, LIBDEFLATEAPI needs to be defined properly before
+ * including libdeflate.h.
+ */
+# error "lib_common.h must always be included before libdeflate.h"
+#endif
+
+#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
+# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
+#elif defined(__GNUC__)
+# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
+#else
+# define LIBDEFLATE_EXPORT_SYM
+#endif
+
+/*
+ * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
+ * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
+ * only guarantee 4-byte alignment when calling functions. This is mainly an
+ * issue on Windows, but it has been seen on Linux too. Work around this ABI
+ * incompatibility by realigning the stack pointer when entering libdeflate.
+ * This prevents crashes in SSE/AVX code.
+ */
+#if defined(__GNUC__) && defined(__i386__)
+# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
+#else
+# define LIBDEFLATE_ALIGN_STACK
+#endif
+
+#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
+
+/*
+ * common_defs.h
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef COMMON_DEFS_H
+#define COMMON_DEFS_H
+
+#include "libdeflate.h"
+
+#include
+#include /* for size_t */
+#include
+#ifdef _MSC_VER
+# include /* for _BitScan*() and other intrinsics */
+# include /* for _byteswap_*() */
+ /* Disable MSVC warnings that are expected. */
+ /* /W2 */
+# pragma warning(disable : 4146) /* unary minus on unsigned type */
+ /* /W3 */
+# pragma warning(disable : 4018) /* signed/unsigned mismatch */
+# pragma warning(disable : 4244) /* possible loss of data */
+# pragma warning(disable : 4267) /* possible loss of precision */
+# pragma warning(disable : 4310) /* cast truncates constant value */
+ /* /W4 */
+# pragma warning(disable : 4100) /* unreferenced formal parameter */
+# pragma warning(disable : 4127) /* conditional expression is constant */
+# pragma warning(disable : 4189) /* local variable initialized but not referenced */
+# pragma warning(disable : 4232) /* nonstandard extension used */
+# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */
+# pragma warning(disable : 4295) /* array too small to include terminating null */
+#endif
+#ifndef FREESTANDING
+# include /* for memcpy() */
+#endif
+
+/* ========================================================================== */
+/* Target architecture */
+/* ========================================================================== */
+
+/* If possible, define a compiler-independent ARCH_* macro. */
+#undef ARCH_X86_64
+#undef ARCH_X86_32
+#undef ARCH_ARM64
+#undef ARCH_ARM32
+#ifdef _MSC_VER
+# if defined(_M_X64)
+# define ARCH_X86_64
+# elif defined(_M_IX86)
+# define ARCH_X86_32
+# elif defined(_M_ARM64)
+# define ARCH_ARM64
+# elif defined(_M_ARM)
+# define ARCH_ARM32
+# endif
+#else
+# if defined(__x86_64__)
+# define ARCH_X86_64
+# elif defined(__i386__)
+# define ARCH_X86_32
+# elif defined(__aarch64__)
+# define ARCH_ARM64
+# elif defined(__arm__)
+# define ARCH_ARM32
+# endif
+#endif
+
+/* ========================================================================== */
+/* Type definitions */
+/* ========================================================================== */
+
+/* Fixed-width integer types */
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+/* ssize_t, if not available in */
+#ifdef _MSC_VER
+# ifdef _WIN64
+ typedef long long ssize_t;
+# else
+ typedef long ssize_t;
+# endif
+#endif
+
+/*
+ * Word type of the target architecture. Use 'size_t' instead of
+ * 'unsigned long' to account for platforms such as Windows that use 32-bit
+ * 'unsigned long' on 64-bit architectures.
+ */
+typedef size_t machine_word_t;
+
+/* Number of bytes in a word */
+#define WORDBYTES ((int)sizeof(machine_word_t))
+
+/* Number of bits in a word */
+#define WORDBITS (8 * WORDBYTES)
+
+/* ========================================================================== */
+/* Optional compiler features */
+/* ========================================================================== */
+
+/* Compiler version checks. Only use when absolutely necessary. */
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+# define GCC_PREREQ(major, minor) \
+ (__GNUC__ > (major) || \
+ (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
+#else
+# define GCC_PREREQ(major, minor) 0
+#endif
+#ifdef __clang__
+# ifdef __apple_build_version__
+# define CLANG_PREREQ(major, minor, apple_version) \
+ (__apple_build_version__ >= (apple_version))
+# else
+# define CLANG_PREREQ(major, minor, apple_version) \
+ (__clang_major__ > (major) || \
+ (__clang_major__ == (major) && __clang_minor__ >= (minor)))
+# endif
+#else
+# define CLANG_PREREQ(major, minor, apple_version) 0
+#endif
+
+/*
+ * Macros to check for compiler support for attributes and builtins. clang
+ * implements these macros, but gcc doesn't, so generally any use of one of
+ * these macros must also be combined with a gcc version check.
+ */
+#ifndef __has_attribute
+# define __has_attribute(attribute) 0
+#endif
+#ifndef __has_builtin
+# define __has_builtin(builtin) 0
+#endif
+
+/* inline - suggest that a function be inlined */
+#ifdef _MSC_VER
+# define inline __inline
+#endif /* else assume 'inline' is usable as-is */
+
+/* forceinline - force a function to be inlined, if possible */
+#if defined(__GNUC__) || __has_attribute(always_inline)
+# define forceinline inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+# define forceinline __forceinline
+#else
+# define forceinline inline
+#endif
+
+/* MAYBE_UNUSED - mark a function or variable as maybe unused */
+#if defined(__GNUC__) || __has_attribute(unused)
+# define MAYBE_UNUSED __attribute__((unused))
+#else
+# define MAYBE_UNUSED
+#endif
+
+/*
+ * restrict - hint that writes only occur through the given pointer.
+ *
+ * Don't use MSVC's __restrict, since it has nonstandard behavior.
+ * Standard restrict is okay, if it is supported.
+ */
+#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L)
+# if defined(__GNUC__) || defined(__clang__)
+# define restrict __restrict__
+# else
+# define restrict
+# endif
+#endif /* else assume 'restrict' is usable as-is */
+
+/* likely(expr) - hint that an expression is usually true */
+#if defined(__GNUC__) || __has_builtin(__builtin_expect)
+# define likely(expr) __builtin_expect(!!(expr), 1)
+#else
+# define likely(expr) (expr)
+#endif
+
+/* unlikely(expr) - hint that an expression is usually false */
+#if defined(__GNUC__) || __has_builtin(__builtin_expect)
+# define unlikely(expr) __builtin_expect(!!(expr), 0)
+#else
+# define unlikely(expr) (expr)
+#endif
+
+/* prefetchr(addr) - prefetch into L1 cache for read */
+#undef prefetchr
+#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
+# define prefetchr(addr) __builtin_prefetch((addr), 0)
+#elif defined(_MSC_VER)
+# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
+# elif defined(ARCH_ARM64)
+# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */)
+# elif defined(ARCH_ARM32)
+# define prefetchr(addr) __prefetch(addr)
+# endif
+#endif
+#ifndef prefetchr
+# define prefetchr(addr)
+#endif
+
+/* prefetchw(addr) - prefetch into L1 cache for write */
+#undef prefetchw
+#if defined(__GNUC__) || __has_builtin(__builtin_prefetch)
+# define prefetchw(addr) __builtin_prefetch((addr), 1)
+#elif defined(_MSC_VER)
+# if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+# define prefetchw(addr) _m_prefetchw(addr)
+# elif defined(ARCH_ARM64)
+# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */)
+# elif defined(ARCH_ARM32)
+# define prefetchw(addr) __prefetchw(addr)
+# endif
+#endif
+#ifndef prefetchw
+# define prefetchw(addr)
+#endif
+
+/*
+ * _aligned_attribute(n) - declare that the annotated variable, or variables of
+ * the annotated type, must be aligned on n-byte boundaries.
+ */
+#undef _aligned_attribute
+#if defined(__GNUC__) || __has_attribute(aligned)
+# define _aligned_attribute(n) __attribute__((aligned(n)))
+#elif defined(_MSC_VER)
+# define _aligned_attribute(n) __declspec(align(n))
+#endif
+
+/*
+ * _target_attribute(attrs) - override the compilation target for a function.
+ *
+ * This accepts one or more comma-separated suffixes to the -m prefix jointly
+ * forming the name of a machine-dependent option. On gcc-like compilers, this
+ * enables codegen for the given targets, including arbitrary compiler-generated
+ * code as well as the corresponding intrinsics. On other compilers this macro
+ * expands to nothing, though MSVC allows intrinsics to be used anywhere anyway.
+ */
+#if GCC_PREREQ(4, 4) || __has_attribute(target)
+# define _target_attribute(attrs) __attribute__((target(attrs)))
+# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1
+#else
+# define _target_attribute(attrs)
+# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0
+#endif
+
+/* ========================================================================== */
+/* Miscellaneous macros */
+/* ========================================================================== */
+
+#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+#define MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
+#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
+#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
+
+/* ========================================================================== */
+/* Endianness handling */
+/* ========================================================================== */
+
+/*
+ * CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big
+ * endian. When possible this is a compile-time macro that can be used in
+ * preprocessor conditionals. As a fallback, a generic method is used that
+ * can't be used in preprocessor conditionals but should still be optimized out.
+ */
+#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */
+# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#elif defined(_MSC_VER)
+# define CPU_IS_LITTLE_ENDIAN() true
+#else
+static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
+{
+ union {
+ u32 w;
+ u8 b;
+ } u;
+
+ u.w = 1;
+ return u.b;
+}
+#endif
+
+/* bswap16(v) - swap the bytes of a 16-bit integer */
+static forceinline u16 bswap16(u16 v)
+{
+#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
+ return __builtin_bswap16(v);
+#elif defined(_MSC_VER)
+ return _byteswap_ushort(v);
+#else
+ return (v << 8) | (v >> 8);
+#endif
+}
+
+/* bswap32(v) - swap the bytes of a 32-bit integer */
+static forceinline u32 bswap32(u32 v)
+{
+#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
+ return __builtin_bswap32(v);
+#elif defined(_MSC_VER)
+ return _byteswap_ulong(v);
+#else
+ return ((v & 0x000000FF) << 24) |
+ ((v & 0x0000FF00) << 8) |
+ ((v & 0x00FF0000) >> 8) |
+ ((v & 0xFF000000) >> 24);
+#endif
+}
+
+/* bswap64(v) - swap the bytes of a 64-bit integer */
+static forceinline u64 bswap64(u64 v)
+{
+#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
+ return __builtin_bswap64(v);
+#elif defined(_MSC_VER)
+ return _byteswap_uint64(v);
+#else
+ return ((v & 0x00000000000000FF) << 56) |
+ ((v & 0x000000000000FF00) << 40) |
+ ((v & 0x0000000000FF0000) << 24) |
+ ((v & 0x00000000FF000000) << 8) |
+ ((v & 0x000000FF00000000) >> 8) |
+ ((v & 0x0000FF0000000000) >> 24) |
+ ((v & 0x00FF000000000000) >> 40) |
+ ((v & 0xFF00000000000000) >> 56);
+#endif
+}
+
+#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
+#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
+#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
+#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
+#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
+#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
+
+/* ========================================================================== */
+/* Unaligned memory accesses */
+/* ========================================================================== */
+
+/*
+ * UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed
+ * efficiently on the target platform, otherwise 0.
+ */
+#if (defined(__GNUC__) || defined(__clang__)) && \
+ (defined(ARCH_X86_64) || defined(ARCH_X86_32) || \
+ defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \
+ /*
+ * For all compilation purposes, WebAssembly behaves like any other CPU
+ * instruction set. Even though WebAssembly engine might be running on
+ * top of different actual CPU architectures, the WebAssembly spec
+ * itself permits unaligned access and it will be fast on most of those
+ * platforms, and simulated at the engine level on others, so it's
+ * worth treating it as a CPU architecture with fast unaligned access.
+ */ defined(__wasm__))
+# define UNALIGNED_ACCESS_IS_FAST 1
+#elif defined(_MSC_VER)
+# define UNALIGNED_ACCESS_IS_FAST 1
+#else
+# define UNALIGNED_ACCESS_IS_FAST 0
+#endif
+
+/*
+ * Implementing unaligned memory accesses using memcpy() is portable, and it
+ * usually gets optimized appropriately by modern compilers. I.e., each
+ * memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store
+ * instruction, not to an actual function call.
+ *
+ * We no longer use the "packed struct" approach to unaligned accesses, as that
+ * is nonstandard, has unclear semantics, and doesn't receive enough testing
+ * (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
+ *
+ * arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
+ * where memcpy() generates inefficient code
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
+ * consider that one case important enough to maintain different code for.
+ * If you run into it, please just use a newer version of gcc (or use clang).
+ */
+
+#ifdef FREESTANDING
+# define MEMCOPY __builtin_memcpy
+#else
+# define MEMCOPY memcpy
+#endif
+
+/* Unaligned loads and stores without endianness conversion */
+
+#define DEFINE_UNALIGNED_TYPE(type) \
+static forceinline type \
+load_##type##_unaligned(const void *p) \
+{ \
+ type v; \
+ \
+ MEMCOPY(&v, p, sizeof(v)); \
+ return v; \
+} \
+ \
+static forceinline void \
+store_##type##_unaligned(type v, void *p) \
+{ \
+ MEMCOPY(p, &v, sizeof(v)); \
+}
+
+DEFINE_UNALIGNED_TYPE(u16)
+DEFINE_UNALIGNED_TYPE(u32)
+DEFINE_UNALIGNED_TYPE(u64)
+DEFINE_UNALIGNED_TYPE(machine_word_t)
+
+#undef MEMCOPY
+
+#define load_word_unaligned load_machine_word_t_unaligned
+#define store_word_unaligned store_machine_word_t_unaligned
+
+/* Unaligned loads with endianness conversion */
+
+static forceinline u16
+get_unaligned_le16(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le16_bswap(load_u16_unaligned(p));
+ else
+ return ((u16)p[1] << 8) | p[0];
+}
+
+static forceinline u16
+get_unaligned_be16(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return be16_bswap(load_u16_unaligned(p));
+ else
+ return ((u16)p[0] << 8) | p[1];
+}
+
+static forceinline u32
+get_unaligned_le32(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le32_bswap(load_u32_unaligned(p));
+ else
+ return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
+ ((u32)p[1] << 8) | p[0];
+}
+
+static forceinline u32
+get_unaligned_be32(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return be32_bswap(load_u32_unaligned(p));
+ else
+ return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
+ ((u32)p[2] << 8) | p[3];
+}
+
+static forceinline u64
+get_unaligned_le64(const u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST)
+ return le64_bswap(load_u64_unaligned(p));
+ else
+ return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
+ ((u64)p[5] << 40) | ((u64)p[4] << 32) |
+ ((u64)p[3] << 24) | ((u64)p[2] << 16) |
+ ((u64)p[1] << 8) | p[0];
+}
+
+static forceinline machine_word_t
+get_unaligned_leword(const u8 *p)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return get_unaligned_le32(p);
+ else
+ return get_unaligned_le64(p);
+}
+
+/* Unaligned stores with endianness conversion */
+
+static forceinline void
+put_unaligned_le16(u16 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u16_unaligned(le16_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ }
+}
+
+static forceinline void
+put_unaligned_be16(u16 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u16_unaligned(be16_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 8);
+ p[1] = (u8)(v >> 0);
+ }
+}
+
+static forceinline void
+put_unaligned_le32(u32 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u32_unaligned(le32_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ p[2] = (u8)(v >> 16);
+ p[3] = (u8)(v >> 24);
+ }
+}
+
+static forceinline void
+put_unaligned_be32(u32 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u32_unaligned(be32_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 24);
+ p[1] = (u8)(v >> 16);
+ p[2] = (u8)(v >> 8);
+ p[3] = (u8)(v >> 0);
+ }
+}
+
+static forceinline void
+put_unaligned_le64(u64 v, u8 *p)
+{
+ if (UNALIGNED_ACCESS_IS_FAST) {
+ store_u64_unaligned(le64_bswap(v), p);
+ } else {
+ p[0] = (u8)(v >> 0);
+ p[1] = (u8)(v >> 8);
+ p[2] = (u8)(v >> 16);
+ p[3] = (u8)(v >> 24);
+ p[4] = (u8)(v >> 32);
+ p[5] = (u8)(v >> 40);
+ p[6] = (u8)(v >> 48);
+ p[7] = (u8)(v >> 56);
+ }
+}
+
+static forceinline void
+put_unaligned_leword(machine_word_t v, u8 *p)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ put_unaligned_le32(v, p);
+ else
+ put_unaligned_le64(v, p);
+}
+
+/* ========================================================================== */
+/* Bit manipulation functions */
+/* ========================================================================== */
+
+/*
+ * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
+ * significant end) of the *most* significant 1 bit in the input value. The
+ * input value must be nonzero!
+ */
+
+static forceinline unsigned
+bsr32(u32 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_clz)
+ return 31 - __builtin_clz(v);
+#elif defined(_MSC_VER)
+ unsigned long i;
+
+ _BitScanReverse(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ while ((v >>= 1) != 0)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsr64(u64 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_clzll)
+ return 63 - __builtin_clzll(v);
+#elif defined(_MSC_VER) && defined(_WIN64)
+ unsigned long i;
+
+ _BitScanReverse64(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ while ((v >>= 1) != 0)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsrw(machine_word_t v)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return bsr32(v);
+ else
+ return bsr64(v);
+}
+
+/*
+ * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
+ * significant end) of the *least* significant 1 bit in the input value. The
+ * input value must be nonzero!
+ */
+
+static forceinline unsigned
+bsf32(u32 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_ctz)
+ return __builtin_ctz(v);
+#elif defined(_MSC_VER)
+ unsigned long i;
+
+ _BitScanForward(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ for (; (v & 1) == 0; v >>= 1)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsf64(u64 v)
+{
+#if defined(__GNUC__) || __has_builtin(__builtin_ctzll)
+ return __builtin_ctzll(v);
+#elif defined(_MSC_VER) && defined(_WIN64)
+ unsigned long i;
+
+ _BitScanForward64(&i, v);
+ return i;
+#else
+ unsigned i = 0;
+
+ for (; (v & 1) == 0; v >>= 1)
+ i++;
+ return i;
+#endif
+}
+
+static forceinline unsigned
+bsfw(machine_word_t v)
+{
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+ if (WORDBITS == 32)
+ return bsf32(v);
+ else
+ return bsf64(v);
+}
+
+/*
+ * rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a
+ * fallback implementation; use '#ifdef rbit32' to check if this is available.
+ */
+#undef rbit32
+#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \
+ (__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
+static forceinline u32
+rbit32(u32 v)
+{
+ __asm__("rbit %0, %1" : "=r" (v) : "r" (v));
+ return v;
+}
+#define rbit32 rbit32
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64)
+static forceinline u32
+rbit32(u32 v)
+{
+ __asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
+ return v;
+}
+#define rbit32 rbit32
+#endif
+
+#endif /* COMMON_DEFS_H */
+
+
+typedef void *(*malloc_func_t)(size_t);
+typedef void (*free_func_t)(void *);
+
+extern malloc_func_t libdeflate_default_malloc_func;
+extern free_func_t libdeflate_default_free_func;
+
+void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
+ size_t alignment, size_t size);
+void libdeflate_aligned_free(free_func_t free_func, void *ptr);
+
+#ifdef FREESTANDING
+/*
+ * With -ffreestanding, may be missing, and we must provide
+ * implementations of memset(), memcpy(), memmove(), and memcmp().
+ * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
+ *
+ * Also, -ffreestanding disables interpreting calls to these functions as
+ * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
+ * not be optimized to a single load instruction. For performance reasons we
+ * don't want that. So, declare these functions as macros that expand to the
+ * corresponding built-ins. This approach is recommended in the gcc man page.
+ * We still need the actual function definitions in case gcc calls them.
+ */
+void *memset(void *s, int c, size_t n);
+#define memset(s, c, n) __builtin_memset((s), (c), (n))
+
+void *memcpy(void *dest, const void *src, size_t n);
+#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
+
+void *memmove(void *dest, const void *src, size_t n);
+#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
+
+int memcmp(const void *s1, const void *s2, size_t n);
+#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
+
+#undef LIBDEFLATE_ENABLE_ASSERTIONS
+#else
+#include
+#endif
+
+/*
+ * Runtime assertion support. Don't enable this in production builds; it may
+ * hurt performance significantly.
+ */
+#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
+void libdeflate_assertion_failed(const char *expr, const char *file, int line);
+#define ASSERT(expr) { if (unlikely(!(expr))) \
+ libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
+#else
+#define ASSERT(expr) (void)(expr)
+#endif
+
+#define CONCAT_IMPL(a, b) a##b
+#define CONCAT(a, b) CONCAT_IMPL(a, b)
+#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
+
+#endif /* LIB_LIB_COMMON_H */
+
+/*
+ * deflate_constants.h - constants for the DEFLATE compression format
+ */
+
+#ifndef LIB_DEFLATE_CONSTANTS_H
+#define LIB_DEFLATE_CONSTANTS_H
+
+/* Valid block types */
+#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
+#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
+#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
+
+/* Minimum and maximum supported match lengths (in bytes) */
+#define DEFLATE_MIN_MATCH_LEN 3
+#define DEFLATE_MAX_MATCH_LEN 258
+
+/* Maximum supported match offset (in bytes) */
+#define DEFLATE_MAX_MATCH_OFFSET 32768
+
+/* log2 of DEFLATE_MAX_MATCH_OFFSET */
+#define DEFLATE_WINDOW_ORDER 15
+
+/* Number of symbols in each Huffman code. Note: for the literal/length
+ * and offset codes, these are actually the maximum values; a given block
+ * might use fewer symbols. */
+#define DEFLATE_NUM_PRECODE_SYMS 19
+#define DEFLATE_NUM_LITLEN_SYMS 288
+#define DEFLATE_NUM_OFFSET_SYMS 32
+
+/* The maximum number of symbols across all codes */
+#define DEFLATE_MAX_NUM_SYMS 288
+
+/* Division of symbols in the literal/length code */
+#define DEFLATE_NUM_LITERALS 256
+#define DEFLATE_END_OF_BLOCK 256
+#define DEFLATE_FIRST_LEN_SYM 257
+
+/* Maximum codeword length, in bits, within each Huffman code */
+#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
+#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
+#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
+
+/* The maximum codeword length across all codes */
+#define DEFLATE_MAX_CODEWORD_LEN 15
+
+/* Maximum possible overrun when decoding codeword lengths */
+#define DEFLATE_MAX_LENS_OVERRUN 137
+
+/*
+ * Maximum number of extra bits that may be required to represent a match
+ * length or offset.
+ */
+#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
+#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13
+
+#endif /* LIB_DEFLATE_CONSTANTS_H */
+
+/*
+ * cpu_features_common.h - code shared by all lib/$arch/cpu_features.c
+ *
+ * Copyright 2020 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_CPU_FEATURES_COMMON_H
+#define LIB_CPU_FEATURES_COMMON_H
+
+#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
+ /* for strdup() and strtok_r() */
+# undef _ANSI_SOURCE
+# ifndef __APPLE__
+# undef _GNU_SOURCE
+# define _GNU_SOURCE
+# endif
+# include
+# include
+# include
+#endif
+
+struct cpu_feature {
+ u32 bit;
+ const char *name;
+};
+
+#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
+/* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */
+static inline void
+disable_cpu_features_for_testing(u32 *features,
+ const struct cpu_feature *feature_table,
+ size_t feature_table_length)
+{
+ char *env_value, *strbuf, *p, *saveptr = NULL;
+ size_t i;
+
+ env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES");
+ if (!env_value)
+ return;
+ strbuf = strdup(env_value);
+ if (!strbuf)
+ abort();
+ p = strtok_r(strbuf, ",", &saveptr);
+ while (p) {
+ for (i = 0; i < feature_table_length; i++) {
+ if (strcmp(p, feature_table[i].name) == 0) {
+ *features &= ~feature_table[i].bit;
+ break;
+ }
+ }
+ if (i == feature_table_length) {
+ fprintf(stderr,
+ "unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n",
+ p);
+ abort();
+ }
+ p = strtok_r(NULL, ",", &saveptr);
+ }
+ free(strbuf);
+}
+#else /* TEST_SUPPORT__DO_NOT_USE */
+static inline void
+disable_cpu_features_for_testing(u32 *features,
+ const struct cpu_feature *feature_table,
+ size_t feature_table_length)
+{
+}
+#endif /* !TEST_SUPPORT__DO_NOT_USE */
+
+#endif /* LIB_CPU_FEATURES_COMMON_H */
+
+/*
+ * x86/cpu_features.h - feature detection for x86 CPUs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIB_X86_CPU_FEATURES_H
+#define LIB_X86_CPU_FEATURES_H
+
+#define HAVE_DYNAMIC_X86_CPU_FEATURES 0
+
+#if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+
+#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER)
+# undef HAVE_DYNAMIC_X86_CPU_FEATURES
+# define HAVE_DYNAMIC_X86_CPU_FEATURES 1
+#endif
+
+#define X86_CPU_FEATURE_SSE2 0x00000001
+#define X86_CPU_FEATURE_PCLMUL 0x00000002
+#define X86_CPU_FEATURE_AVX 0x00000004
+#define X86_CPU_FEATURE_AVX2 0x00000008
+#define X86_CPU_FEATURE_BMI2 0x00000010
+
+#define HAVE_SSE2(features) (HAVE_SSE2_NATIVE || ((features) & X86_CPU_FEATURE_SSE2))
+#define HAVE_PCLMUL(features) (HAVE_PCLMUL_NATIVE || ((features) & X86_CPU_FEATURE_PCLMUL))
+#define HAVE_AVX(features) (HAVE_AVX_NATIVE || ((features) & X86_CPU_FEATURE_AVX))
+#define HAVE_AVX2(features) (HAVE_AVX2_NATIVE || ((features) & X86_CPU_FEATURE_AVX2))
+#define HAVE_BMI2(features) (HAVE_BMI2_NATIVE || ((features) & X86_CPU_FEATURE_BMI2))
+
+#if HAVE_DYNAMIC_X86_CPU_FEATURES
+#define X86_CPU_FEATURES_KNOWN 0x80000000
+extern volatile u32 libdeflate_x86_cpu_features;
+
+void libdeflate_init_x86_cpu_features(void);
+
+static inline u32 get_x86_cpu_features(void)
+{
+ if (libdeflate_x86_cpu_features == 0)
+ libdeflate_init_x86_cpu_features();
+ return libdeflate_x86_cpu_features;
+}
+#else /* HAVE_DYNAMIC_X86_CPU_FEATURES */
+static inline u32 get_x86_cpu_features(void) { return 0; }
+#endif /* !HAVE_DYNAMIC_X86_CPU_FEATURES */
+
+/*
+ * Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics not
+ * available in the main target couldn't be used in 'target' attribute
+ * functions. Unfortunately clang has no feature test macro for this, so we
+ * have to check its version.
+ */
+#if HAVE_DYNAMIC_X86_CPU_FEATURES && \
+ (GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) || defined(_MSC_VER))
+# define HAVE_TARGET_INTRINSICS 1
+#else
+# define HAVE_TARGET_INTRINSICS 0
+#endif
+
+/* SSE2 */
+#if defined(__SSE2__) || \
+ (defined(_MSC_VER) && \
+ (defined(ARCH_X86_64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
+# define HAVE_SSE2_NATIVE 1
+#else
+# define HAVE_SSE2_NATIVE 0
+#endif
+#define HAVE_SSE2_INTRIN (HAVE_SSE2_NATIVE || HAVE_TARGET_INTRINSICS)
+
+/* PCLMUL */
+#if defined(__PCLMUL__) || (defined(_MSC_VER) && defined(__AVX2__))
+# define HAVE_PCLMUL_NATIVE 1
+#else
+# define HAVE_PCLMUL_NATIVE 0
+#endif
+#if HAVE_PCLMUL_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 4) || CLANG_PREREQ(3, 2, 0) || \
+ defined(_MSC_VER)))
+# define HAVE_PCLMUL_INTRIN 1
+#else
+# define HAVE_PCLMUL_INTRIN 0
+#endif
+
+/* AVX */
+#ifdef __AVX__
+# define HAVE_AVX_NATIVE 1
+#else
+# define HAVE_AVX_NATIVE 0
+#endif
+#if HAVE_AVX_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 6) || CLANG_PREREQ(3, 0, 0) || \
+ defined(_MSC_VER)))
+# define HAVE_AVX_INTRIN 1
+#else
+# define HAVE_AVX_INTRIN 0
+#endif
+
+/* AVX2 */
+#ifdef __AVX2__
+# define HAVE_AVX2_NATIVE 1
+#else
+# define HAVE_AVX2_NATIVE 0
+#endif
+#if HAVE_AVX2_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 7) || CLANG_PREREQ(3, 1, 0) || \
+ defined(_MSC_VER)))
+# define HAVE_AVX2_INTRIN 1
+#else
+# define HAVE_AVX2_INTRIN 0
+#endif
+
+/* BMI2 */
+#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
+# define HAVE_BMI2_NATIVE 1
+#else
+# define HAVE_BMI2_NATIVE 0
+#endif
+#if HAVE_BMI2_NATIVE || (HAVE_TARGET_INTRINSICS && \
+ (GCC_PREREQ(4, 7) || CLANG_PREREQ(3, 1, 0) || \
+ defined(_MSC_VER)))
+# define HAVE_BMI2_INTRIN 1
+#else
+# define HAVE_BMI2_INTRIN 0
+#endif
+
+#endif /* ARCH_X86_32 || ARCH_X86_64 */
+
+#endif /* LIB_X86_CPU_FEATURES_H */
+
+
+/*
+ * If the expression passed to SAFETY_CHECK() evaluates to false, then the
+ * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
+ * compressed data is invalid.
+ *
+ * Theoretically, these checks could be disabled for specialized applications
+ * where all input to the decompressor will be trusted.
+ */
+#if 0
+# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
+# define SAFETY_CHECK(expr) (void)(expr)
+#else
+# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
+#endif
+
+/*****************************************************************************
+ * Input bitstream *
+ *****************************************************************************/
+
+/*
+ * The state of the "input bitstream" consists of the following variables:
+ *
+ * - in_next: a pointer to the next unread byte in the input buffer
+ *
+ * - in_end: a pointer to just past the end of the input buffer
+ *
+ * - bitbuf: a word-sized variable containing bits that have been read from
+ * the input buffer or from the implicit appended zero bytes
+ *
+ * - bitsleft: the number of bits in 'bitbuf' available to be consumed.
+ * After REFILL_BITS_BRANCHLESS(), 'bitbuf' can actually
+ * contain more bits than this. However, only the bits counted
+ * by 'bitsleft' can actually be consumed; the rest can only be
+ * used for preloading.
+ *
+ * As a micro-optimization, we allow bits 8 and higher of
+ * 'bitsleft' to contain garbage. When consuming the bits
+ * associated with a decode table entry, this allows us to do
+ * 'bitsleft -= entry' instead of 'bitsleft -= (u8)entry'.
+ * On some CPUs, this helps reduce instruction dependencies.
+ * This does have the disadvantage that 'bitsleft' sometimes
+ * needs to be cast to 'u8', such as when it's used as a shift
+ * amount in REFILL_BITS_BRANCHLESS(). But that one happens
+ * for free since most CPUs ignore high bits in shift amounts.
+ *
+ * - overread_count: the total number of implicit appended zero bytes that
+ * have been loaded into the bitbuffer, including any
+ * counted by 'bitsleft' and any already consumed
+ */
+
+/*
+ * The type for the bitbuffer variable ('bitbuf' described above). For best
+ * performance, this should have size equal to a machine word.
+ *
+ * 64-bit platforms have a significant advantage: they get a bigger bitbuffer
+ * which they don't have to refill as often.
+ */
+typedef machine_word_t bitbuf_t;
+#define BITBUF_NBITS (8 * (int)sizeof(bitbuf_t))
+
+/* BITMASK(n) returns a bitmask of length 'n'. */
+#define BITMASK(n) (((bitbuf_t)1 << (n)) - 1)
+
+/*
+ * MAX_BITSLEFT is the maximum number of consumable bits, i.e. the maximum value
+ * of '(u8)bitsleft'. This is the size of the bitbuffer variable, minus 1 if
+ * the branchless refill method is being used (see REFILL_BITS_BRANCHLESS()).
+ */
+#define MAX_BITSLEFT \
+ (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS - 1 : BITBUF_NBITS)
+
+/*
+ * CONSUMABLE_NBITS is the minimum number of bits that are guaranteed to be
+ * consumable (counted in 'bitsleft') immediately after refilling the bitbuffer.
+ * Since only whole bytes can be added to 'bitsleft', the worst case is
+ * 'MAX_BITSLEFT - 7': the smallest amount where another byte doesn't fit.
+ */
+#define CONSUMABLE_NBITS (MAX_BITSLEFT - 7)
+
+/*
+ * FASTLOOP_PRELOADABLE_NBITS is the minimum number of bits that are guaranteed
+ * to be preloadable immediately after REFILL_BITS_IN_FASTLOOP(). (It is *not*
+ * guaranteed after REFILL_BITS(), since REFILL_BITS() falls back to a
+ * byte-at-a-time refill method near the end of input.) This may exceed the
+ * number of consumable bits (counted by 'bitsleft'). Any bits not counted in
+ * 'bitsleft' can only be used for precomputation and cannot be consumed.
+ */
+#define FASTLOOP_PRELOADABLE_NBITS \
+ (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS : CONSUMABLE_NBITS)
+
+/*
+ * PRELOAD_SLACK is the minimum number of bits that are guaranteed to be
+ * preloadable but not consumable, following REFILL_BITS_IN_FASTLOOP() and any
+ * subsequent consumptions. This is 1 bit if the branchless refill method is
+ * being used, and 0 bits otherwise.
+ */
+#define PRELOAD_SLACK MAX(0, FASTLOOP_PRELOADABLE_NBITS - MAX_BITSLEFT)
+
+/*
+ * CAN_CONSUME(n) is true if it's guaranteed that if the bitbuffer has just been
+ * refilled, then it's always possible to consume 'n' bits from it. 'n' should
+ * be a compile-time constant, to enable compile-time evaluation.
+ */
+#define CAN_CONSUME(n) (CONSUMABLE_NBITS >= (n))
+
+/*
+ * CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) is true if it's
+ * guaranteed that after REFILL_BITS_IN_FASTLOOP(), it's always possible to
+ * consume 'consume_nbits' bits, then preload 'preload_nbits' bits. The
+ * arguments should be compile-time constants to enable compile-time evaluation.
+ */
+#define CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) \
+ (CONSUMABLE_NBITS >= (consume_nbits) && \
+ FASTLOOP_PRELOADABLE_NBITS >= (consume_nbits) + (preload_nbits))
+
+/*
+ * REFILL_BITS_BRANCHLESS() branchlessly refills the bitbuffer variable by
+ * reading the next word from the input buffer and updating 'in_next' and
+ * 'bitsleft' based on how many bits were refilled -- counting whole bytes only.
+ * This is much faster than reading a byte at a time, at least if the CPU is
+ * little endian and supports fast unaligned memory accesses.
+ *
+ * The simplest way of branchlessly updating 'bitsleft' would be:
+ *
+ * bitsleft += (MAX_BITSLEFT - bitsleft) & ~7;
+ *
+ * To make it faster, we define MAX_BITSLEFT to be 'WORDBITS - 1' rather than
+ * WORDBITS, so that in binary it looks like 111111 or 11111. Then, we update
+ * 'bitsleft' by just setting the bits above the low 3 bits:
+ *
+ * bitsleft |= MAX_BITSLEFT & ~7;
+ *
+ * That compiles down to a single instruction like 'or $0x38, %rbp'. Using
+ * 'MAX_BITSLEFT == WORDBITS - 1' also has the advantage that refills can be
+ * done when 'bitsleft == MAX_BITSLEFT' without invoking undefined behavior.
+ *
+ * The simplest way of branchlessly updating 'in_next' would be:
+ *
+ * in_next += (MAX_BITSLEFT - bitsleft) >> 3;
+ *
+ * With 'MAX_BITSLEFT == WORDBITS - 1' we could use an XOR instead, though this
+ * isn't really better:
+ *
+ * in_next += (MAX_BITSLEFT ^ bitsleft) >> 3;
+ *
+ * An alternative which can be marginally better is the following:
+ *
+ * in_next += sizeof(bitbuf_t) - 1;
+ * in_next -= (bitsleft >> 3) & 0x7;
+ *
+ * It seems this would increase the number of CPU instructions from 3 (sub, shr,
+ * add) to 4 (add, shr, and, sub). However, if the CPU has a bitfield
+ * extraction instruction (e.g. arm's ubfx), it stays at 3, and is potentially
+ * more efficient because the length of the longest dependency chain decreases
+ * from 3 to 2. This alternative also has the advantage that it ignores the
+ * high bits in 'bitsleft', so it is compatible with the micro-optimization we
+ * use where we let the high bits of 'bitsleft' contain garbage.
+ */
+#define REFILL_BITS_BRANCHLESS() \
+do { \
+ bitbuf |= get_unaligned_leword(in_next) << (u8)bitsleft; \
+ in_next += sizeof(bitbuf_t) - 1; \
+ in_next -= (bitsleft >> 3) & 0x7; \
+ bitsleft |= MAX_BITSLEFT & ~7; \
+} while (0)
+
+/*
+ * REFILL_BITS() loads bits from the input buffer until the bitbuffer variable
+ * contains at least CONSUMABLE_NBITS consumable bits.
+ *
+ * This checks for the end of input, and it doesn't guarantee
+ * FASTLOOP_PRELOADABLE_NBITS, so it can't be used in the fastloop.
+ *
+ * If we would overread the input buffer, we just don't read anything, leaving
+ * the bits zeroed but marking them filled. This simplifies the decompressor
+ * because it removes the need to always be able to distinguish between real
+ * overreads and overreads caused only by the decompressor's own lookahead.
+ *
+ * We do still keep track of the number of bytes that have been overread, for
+ * two reasons. First, it allows us to determine the exact number of bytes that
+ * were consumed once the stream ends or an uncompressed block is reached.
+ * Second, it allows us to stop early if the overread amount gets so large (more
+ * than sizeof bitbuf) that it can only be caused by a real overread. (The
+ * second part is arguably unneeded, since libdeflate is buffer-based; given
+ * infinite zeroes, it will eventually either completely fill the output buffer
+ * or return an error. However, we do it to be slightly more friendly to the
+ * not-recommended use case of decompressing with an unknown output size.)
+ */
+#define REFILL_BITS() \
+do { \
+ if (UNALIGNED_ACCESS_IS_FAST && \
+ likely(in_end - in_next >= sizeof(bitbuf_t))) { \
+ REFILL_BITS_BRANCHLESS(); \
+ } else { \
+ while ((u8)bitsleft < CONSUMABLE_NBITS) { \
+ if (likely(in_next != in_end)) { \
+ bitbuf |= (bitbuf_t)*in_next++ << \
+ (u8)bitsleft; \
+ } else { \
+ overread_count++; \
+ SAFETY_CHECK(overread_count <= \
+ sizeof(bitbuf_t)); \
+ } \
+ bitsleft += 8; \
+ } \
+ } \
+} while (0)
+
+/*
+ * REFILL_BITS_IN_FASTLOOP() is like REFILL_BITS(), but it doesn't check for the
+ * end of the input. It can only be used in the fastloop.
+ */
+#define REFILL_BITS_IN_FASTLOOP() \
+do { \
+ STATIC_ASSERT(UNALIGNED_ACCESS_IS_FAST || \
+ FASTLOOP_PRELOADABLE_NBITS == CONSUMABLE_NBITS); \
+ if (UNALIGNED_ACCESS_IS_FAST) { \
+ REFILL_BITS_BRANCHLESS(); \
+ } else { \
+ while ((u8)bitsleft < CONSUMABLE_NBITS) { \
+ bitbuf |= (bitbuf_t)*in_next++ << (u8)bitsleft; \
+ bitsleft += 8; \
+ } \
+ } \
+} while (0)
+
+/*
+ * This is the worst-case maximum number of output bytes that are written to
+ * during each iteration of the fastloop. The worst case is 2 literals, then a
+ * match of length DEFLATE_MAX_MATCH_LEN. Additionally, some slack space must
+ * be included for the intentional overrun in the match copy implementation.
+ */
+#define FASTLOOP_MAX_BYTES_WRITTEN \
+ (2 + DEFLATE_MAX_MATCH_LEN + (5 * WORDBYTES) - 1)
+
+/*
+ * This is the worst-case maximum number of input bytes that are read during
+ * each iteration of the fastloop. To get this value, we first compute the
+ * greatest number of bits that can be refilled during a loop iteration. The
+ * refill at the beginning can add at most MAX_BITSLEFT, and the amount that can
+ * be refilled later is no more than the maximum amount that can be consumed by
+ * 2 literals that don't need a subtable, then a match. We convert this value
+ * to bytes, rounding up; this gives the maximum number of bytes that 'in_next'
+ * can be advanced. Finally, we add sizeof(bitbuf_t) to account for
+ * REFILL_BITS_BRANCHLESS() reading a word past 'in_next'.
+ */
+#define FASTLOOP_MAX_BYTES_READ \
+ (DIV_ROUND_UP(MAX_BITSLEFT + (2 * LITLEN_TABLEBITS) + \
+ LENGTH_MAXBITS + OFFSET_MAXBITS, 8) + \
+ sizeof(bitbuf_t))
+
+/*****************************************************************************
+ * Huffman decoding *
+ *****************************************************************************/
+
+/*
+ * The fastest way to decode Huffman-encoded data is basically to use a decode
+ * table that maps the next TABLEBITS bits of data to their symbol. Each entry
+ * decode_table[i] maps to the symbol whose codeword is a prefix of 'i'. A
+ * symbol with codeword length 'n' has '2**(TABLEBITS-n)' entries in the table.
+ *
+ * Ideally, TABLEBITS and the maximum codeword length would be the same; some
+ * compression formats are designed with this goal in mind. Unfortunately, in
+ * DEFLATE, the maximum litlen and offset codeword lengths are 15 bits, which is
+ * too large for a practical TABLEBITS. It's not *that* much larger, though, so
+ * the workaround is to use a single level of subtables. In the main table,
+ * entries for prefixes of codewords longer than TABLEBITS contain a "pointer"
+ * to the appropriate subtable along with the number of bits it is indexed with.
+ *
+ * The most efficient way to allocate subtables is to allocate them dynamically
+ * after the main table. The worst-case number of table entries needed,
+ * including subtables, is precomputable; see the ENOUGH constants below.
+ *
+ * A useful optimization is to store the codeword lengths in the decode table so
+ * that they don't have to be looked up by indexing a separate table that maps
+ * symbols to their codeword lengths. We basically do this; however, for the
+ * litlen and offset codes we also implement some DEFLATE-specific optimizations
+ * that build in the consideration of the "extra bits" and the
+ * literal/length/end-of-block division. For the exact decode table entry
+ * format we use, see the definitions of the *_decode_results[] arrays below.
+ */
+
+
+/*
+ * These are the TABLEBITS values we use for each of the DEFLATE Huffman codes,
+ * along with their corresponding ENOUGH values.
+ *
+ * For the precode, we use PRECODE_TABLEBITS == 7 since this is the maximum
+ * precode codeword length. This avoids ever needing subtables.
+ *
+ * For the litlen and offset codes, we cannot realistically avoid ever needing
+ * subtables, since litlen and offset codewords can be up to 15 bits. A higher
+ * TABLEBITS reduces the number of lookups that need a subtable, which increases
+ * performance; however, it increases memory usage and makes building the table
+ * take longer, which decreases performance. We choose values that work well in
+ * practice, making subtables rarely needed without making the tables too large.
+ *
+ * Our choice of OFFSET_TABLEBITS == 8 is a bit low; without any special
+ * considerations, 9 would fit the trade-off curve better. However, there is a
+ * performance benefit to using exactly 8 bits when it is a compile-time
+ * constant, as many CPUs can take the low byte more easily than the low 9 bits.
+ *
+ * zlib treats its equivalents of TABLEBITS as maximum values; whenever it
+ * builds a table, it caps the actual table_bits to the longest codeword. This
+ * makes sense in theory, as there's no need for the table to be any larger than
+ * needed to support the longest codeword. However, having the table bits be a
+ * compile-time constant is beneficial to the performance of the decode loop, so
+ * there is a trade-off. libdeflate currently uses the dynamic table_bits
+ * strategy for the litlen table only, due to its larger maximum size.
+ * PRECODE_TABLEBITS and OFFSET_TABLEBITS are smaller, so going dynamic there
+ * isn't as useful, and OFFSET_TABLEBITS=8 is useful as mentioned above.
+ *
+ * Each TABLEBITS value has a corresponding ENOUGH value that gives the
+ * worst-case maximum number of decode table entries, including the main table
+ * and all subtables. The ENOUGH value depends on three parameters:
+ *
+ * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS)
+ * (2) the maximum number of main table bits (*_TABLEBITS)
+ * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
+ *
+ * The ENOUGH values were computed using the utility program 'enough' from zlib.
+ */
+#define PRECODE_TABLEBITS 7
+#define PRECODE_ENOUGH 128 /* enough 19 7 7 */
+#define LITLEN_TABLEBITS 11
+#define LITLEN_ENOUGH 2342 /* enough 288 11 15 */
+#define OFFSET_TABLEBITS 8
+#define OFFSET_ENOUGH 402 /* enough 32 8 15 */
+
+/*
+ * make_decode_table_entry() creates a decode table entry for the given symbol
+ * by combining the static part 'decode_results[sym]' with the dynamic part
+ * 'len', which is the remaining codeword length (the codeword length for main
+ * table entries, or the codeword length minus TABLEBITS for subtable entries).
+ *
+ * In all cases, we add 'len' to each of the two low-order bytes to create the
+ * appropriately-formatted decode table entry. See the definitions of the
+ * *_decode_results[] arrays below, where the entry format is described.
+ */
+static forceinline u32
+make_decode_table_entry(const u32 decode_results[], u32 sym, u32 len)
+{
+ return decode_results[sym] + (len << 8) + len;
+}
+
+/*
+ * Here is the format of our precode decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Bit 20-16: presym
+ * Bit 10-8: codeword length [not used]
+ * Bit 2-0: codeword length
+ *
+ * The precode decode table never has subtables, since we use
+ * PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN.
+ *
+ * precode_decode_results[] contains the static part of the entry for each
+ * symbol. make_decode_table_entry() produces the final entries.
+ */
+static const u32 precode_decode_results[] = {
+#define ENTRY(presym) ((u32)presym << 16)
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
+ ENTRY(16) , ENTRY(17) , ENTRY(18) ,
+#undef ENTRY
+};
+
+/* Litlen and offset decode table entry flags */
+
+/* Indicates a literal entry in the litlen decode table */
+#define HUFFDEC_LITERAL 0x80000000
+
+/* Indicates that HUFFDEC_SUBTABLE_POINTER or HUFFDEC_END_OF_BLOCK is set */
+#define HUFFDEC_EXCEPTIONAL 0x00008000
+
+/* Indicates a subtable pointer entry in the litlen or offset decode table */
+#define HUFFDEC_SUBTABLE_POINTER 0x00004000
+
+/* Indicates an end-of-block entry in the litlen decode table */
+#define HUFFDEC_END_OF_BLOCK 0x00002000
+
+/* Maximum number of bits that can be consumed by decoding a match length */
+#define LENGTH_MAXBITS (DEFLATE_MAX_LITLEN_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS)
+#define LENGTH_MAXFASTBITS (LITLEN_TABLEBITS /* no subtable needed */ + \
+ DEFLATE_MAX_EXTRA_LENGTH_BITS)
+
+/*
+ * Here is the format of our litlen decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Literals:
+ * Bit 31: 1 (HUFFDEC_LITERAL)
+ * Bit 23-16: literal value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length [not used]
+ * Bit 3-0: remaining codeword length
+ * Lengths:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 24-16: length base value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length
+ * Bit 4-0: remaining codeword length + number of extra bits
+ * End of block:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 1 (HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: remaining codeword length [not used]
+ * Bit 3-0: remaining codeword length
+ * Subtable pointer:
+ * Bit 31: 0 (!HUFFDEC_LITERAL)
+ * Bit 30-16: index of start of subtable
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER)
+ * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK)
+ * Bit 11-8: number of subtable bits
+ * Bit 3-0: number of main table bits
+ *
+ * This format has several desirable properties:
+ *
+ * - The codeword length, length slot base, and number of extra length bits
+ * are all built in. This eliminates the need to separately look up this
+ * information by indexing separate arrays by symbol or length slot.
+ *
+ * - The HUFFDEC_* flags enable easily distinguishing between the different
+ * types of entries. The HUFFDEC_LITERAL flag enables a fast path for
+ * literals; the high bit is used for this, as some CPUs can test the
+ * high bit more easily than other bits. The HUFFDEC_EXCEPTIONAL flag
+ * makes it possible to detect the two unlikely cases (subtable pointer
+ * and end of block) in a single bit flag test.
+ *
+ * - The low byte is the number of bits that need to be removed from the
+ * bitstream; this makes this value easily accessible, and it enables the
+ * micro-optimization of doing 'bitsleft -= entry' instead of
+ * 'bitsleft -= (u8)entry'. It also includes the number of extra bits,
+ * so they don't need to be removed separately.
+ *
+ * - The flags in bits 15-13 are arranged to be 0 when the
+ * "remaining codeword length" in bits 11-8 is needed, making this value
+ * fairly easily accessible as well via a shift and downcast.
+ *
+ * - Similarly, bits 13-12 are 0 when the "subtable bits" in bits 11-8 are
+ * needed, making it possible to extract this value with '& 0x3F' rather
+ * than '& 0xF'. This value is only used as a shift amount, so this can
+ * save an 'and' instruction as the masking by 0x3F happens implicitly.
+ *
+ * litlen_decode_results[] contains the static part of the entry for each
+ * symbol. make_decode_table_entry() produces the final entries.
+ */
+static const u32 litlen_decode_results[] = {
+
+ /* Literals */
+#define ENTRY(literal) (HUFFDEC_LITERAL | ((u32)literal << 16))
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
+ ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
+ ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
+ ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
+ ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
+ ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
+ ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
+ ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
+ ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
+ ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
+ ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
+ ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
+ ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
+ ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
+ ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
+ ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
+ ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
+ ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
+ ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
+ ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
+ ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
+ ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
+ ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
+ ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
+ ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
+ ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
+ ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
+ ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
+ ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
+ ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
+ ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
+ ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
+ ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
+ ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
+ ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
+ ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
+ ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
+ ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
+ ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
+ ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
+ ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
+ ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
+ ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
+ ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
+ ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
+ ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
+ ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
+ ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
+ ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
+ ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
+ ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
+ ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
+ ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
+ ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
+ ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
+ ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
+ ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
+ ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
+ ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
+ ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
+ ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
+#undef ENTRY
+
+ /* End of block */
+ HUFFDEC_EXCEPTIONAL | HUFFDEC_END_OF_BLOCK,
+
+ /* Lengths */
+#define ENTRY(length_base, num_extra_bits) \
+ (((u32)(length_base) << 16) | (num_extra_bits))
+ ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
+ ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
+ ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
+ ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
+ ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
+ ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
+ ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
+ ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
+#undef ENTRY
+};
+
+/* Maximum number of bits that can be consumed by decoding a match offset */
+#define OFFSET_MAXBITS (DEFLATE_MAX_OFFSET_CODEWORD_LEN + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)
+#define OFFSET_MAXFASTBITS (OFFSET_TABLEBITS /* no subtable needed */ + \
+ DEFLATE_MAX_EXTRA_OFFSET_BITS)
+
+/*
+ * Here is the format of our offset decode table entries. Bits not explicitly
+ * described contain zeroes:
+ *
+ * Offsets:
+ * Bit 31-16: offset base value
+ * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER)
+ * Bit 11-8: remaining codeword length
+ * Bit 4-0: remaining codeword length + number of extra bits
+ * Subtable pointer:
+ * Bit 31-16: index of start of subtable
+ * Bit 15: 1 (HUFFDEC_EXCEPTIONAL)
+ * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER)
+ * Bit 11-8: number of subtable bits
+ * Bit 3-0: number of main table bits
+ *
+ * These work the same way as the length entries and subtable pointer entries in
+ * the litlen decode table; see litlen_decode_results[] above.
+ */
+static const u32 offset_decode_results[] = {
+#define ENTRY(offset_base, num_extra_bits) \
+ (((u32)(offset_base) << 16) | (num_extra_bits))
+ ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
+ ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
+ ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
+ ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
+ ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
+ ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
+ ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
+ ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) ,
+#undef ENTRY
+};
+
+/*
+ * The main DEFLATE decompressor structure. Since libdeflate only supports
+ * full-buffer decompression, this structure doesn't store the entire
+ * decompression state, most of which is in stack variables. Instead, this
+ * struct just contains the decode tables and some temporary arrays used for
+ * building them, as these are too large to comfortably allocate on the stack.
+ *
+ * Storing the decode tables in the decompressor struct also allows the decode
+ * tables for the static codes to be reused whenever two static Huffman blocks
+ * are decoded without an intervening dynamic block, even across streams.
+ */
+struct libdeflate_decompressor {
+
+ /*
+ * The arrays aren't all needed at the same time. 'precode_lens' and
+ * 'precode_decode_table' are unneeded after 'lens' has been filled.
+ * Furthermore, 'lens' need not be retained after building the litlen
+ * and offset decode tables. In fact, 'lens' can be in union with
+ * 'litlen_decode_table' provided that 'offset_decode_table' is separate
+ * and is built first.
+ */
+
+ union {
+ u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
+
+ struct {
+ u8 lens[DEFLATE_NUM_LITLEN_SYMS +
+ DEFLATE_NUM_OFFSET_SYMS +
+ DEFLATE_MAX_LENS_OVERRUN];
+
+ u32 precode_decode_table[PRECODE_ENOUGH];
+ } l;
+
+ u32 litlen_decode_table[LITLEN_ENOUGH];
+ } u;
+
+ u32 offset_decode_table[OFFSET_ENOUGH];
+
+ /* used only during build_decode_table() */
+ u16 sorted_syms[DEFLATE_MAX_NUM_SYMS];
+
+ bool static_codes_loaded;
+ unsigned litlen_tablebits;
+
+ /* The free() function for this struct, chosen at allocation time */
+ free_func_t free_func;
+};
+
+/*
+ * Build a table for fast decoding of symbols from a Huffman code. As input,
+ * this function takes the codeword length of each symbol which may be used in
+ * the code. As output, it produces a decode table for the canonical Huffman
+ * code described by the codeword lengths. The decode table is built with the
+ * assumption that it will be indexed with "bit-reversed" codewords, where the
+ * low-order bit is the first bit of the codeword. This format is used for all
+ * Huffman codes in DEFLATE.
+ *
+ * @decode_table
+ * The array in which the decode table will be generated. This array must
+ * have sufficient length; see the definition of the ENOUGH numbers.
+ * @lens
+ * An array which provides, for each symbol, the length of the
+ * corresponding codeword in bits, or 0 if the symbol is unused. This may
+ * alias @decode_table, since nothing is written to @decode_table until all
+ * @lens have been consumed. All codeword lengths are assumed to be <=
+ * @max_codeword_len but are otherwise considered untrusted. If they do
+ * not form a valid Huffman code, then the decode table is not built and
+ * %false is returned.
+ * @num_syms
+ * The number of symbols in the code, including all unused symbols.
+ * @decode_results
+ * An array which gives the incomplete decode result for each symbol. The
+ * needed values in this array will be combined with codeword lengths to
+ * make the final decode table entries using make_decode_table_entry().
+ * @table_bits
+ * The log base-2 of the number of main table entries to use.
+ * If @table_bits_ret != NULL, then @table_bits is treated as a maximum
+ * value and it will be decreased if a smaller table would be sufficient.
+ * @max_codeword_len
+ * The maximum allowed codeword length for this Huffman code.
+ * Must be <= DEFLATE_MAX_CODEWORD_LEN.
+ * @sorted_syms
+ * A temporary array of length @num_syms.
+ * @table_bits_ret
+ * If non-NULL, then the dynamic table_bits is enabled, and the actual
+ * table_bits value will be returned here.
+ *
+ * Returns %true if successful; %false if the codeword lengths do not form a
+ * valid Huffman code.
+ */
+static bool
+build_decode_table(u32 decode_table[],
+ const u8 lens[],
+ const unsigned num_syms,
+ const u32 decode_results[],
+ unsigned table_bits,
+ unsigned max_codeword_len,
+ u16 *sorted_syms,
+ unsigned *table_bits_ret)
+{
+ unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
+ unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1];
+ unsigned sym; /* current symbol */
+ unsigned codeword; /* current codeword, bit-reversed */
+ unsigned len; /* current codeword length in bits */
+ unsigned count; /* num codewords remaining with this length */
+ u32 codespace_used; /* codespace used out of '2^max_codeword_len' */
+ unsigned cur_table_end; /* end index of current table */
+ unsigned subtable_prefix; /* codeword prefix of current subtable */
+ unsigned subtable_start; /* start index of current subtable */
+ unsigned subtable_bits; /* log2 of current subtable length */
+
+ /* Count how many codewords have each length, including 0. */
+ for (len = 0; len <= max_codeword_len; len++)
+ len_counts[len] = 0;
+ for (sym = 0; sym < num_syms; sym++)
+ len_counts[lens[sym]]++;
+
+ /*
+ * Determine the actual maximum codeword length that was used, and
+ * decrease table_bits to it if allowed.
+ */
+ while (max_codeword_len > 1 && len_counts[max_codeword_len] == 0)
+ max_codeword_len--;
+ if (table_bits_ret != NULL) {
+ table_bits = MIN(table_bits, max_codeword_len);
+ *table_bits_ret = table_bits;
+ }
+
+ /*
+ * Sort the symbols primarily by increasing codeword length and
+ * secondarily by increasing symbol value; or equivalently by their
+ * codewords in lexicographic order, since a canonical code is assumed.
+ *
+ * For efficiency, also compute 'codespace_used' in the same pass over
+ * 'len_counts[]' used to build 'offsets[]' for sorting.
+ */
+
+ /* Ensure that 'codespace_used' cannot overflow. */
+ STATIC_ASSERT(sizeof(codespace_used) == 4);
+ STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >=
+ DEFLATE_MAX_NUM_SYMS);
+
+ offsets[0] = 0;
+ offsets[1] = len_counts[0];
+ codespace_used = 0;
+ for (len = 1; len < max_codeword_len; len++) {
+ offsets[len + 1] = offsets[len] + len_counts[len];
+ codespace_used = (codespace_used << 1) + len_counts[len];
+ }
+ codespace_used = (codespace_used << 1) + len_counts[len];
+
+ for (sym = 0; sym < num_syms; sym++)
+ sorted_syms[offsets[lens[sym]]++] = sym;
+
+ sorted_syms += offsets[0]; /* Skip unused symbols */
+
+ /* lens[] is done being used, so we can write to decode_table[] now. */
+
+ /*
+ * Check whether the lengths form a complete code (exactly fills the
+ * codespace), an incomplete code (doesn't fill the codespace), or an
+ * overfull code (overflows the codespace). A codeword of length 'n'
+ * uses proportion '1/(2^n)' of the codespace. An overfull code is
+ * nonsensical, so is considered invalid. An incomplete code is
+ * considered valid only in two specific cases; see below.
+ */
+
+ /* overfull code? */
+ if (unlikely(codespace_used > (1U << max_codeword_len)))
+ return false;
+
+ /* incomplete code? */
+ if (unlikely(codespace_used < (1U << max_codeword_len))) {
+ u32 entry;
+ unsigned i;
+
+ if (codespace_used == 0) {
+ /*
+ * An empty code is allowed. This can happen for the
+ * offset code in DEFLATE, since a dynamic Huffman block
+ * need not contain any matches.
+ */
+
+ /* sym=0, len=1 (arbitrary) */
+ entry = make_decode_table_entry(decode_results, 0, 1);
+ } else {
+ /*
+ * Allow codes with a single used symbol, with codeword
+ * length 1. The DEFLATE RFC is unclear regarding this
+ * case. What zlib's decompressor does is permit this
+ * for the litlen and offset codes and assume the
+ * codeword is '0' rather than '1'. We do the same
+ * except we allow this for precodes too, since there's
+ * no convincing reason to treat the codes differently.
+ * We also assign both codewords '0' and '1' to the
+ * symbol to avoid having to handle '1' specially.
+ */
+ if (codespace_used != (1U << (max_codeword_len - 1)) ||
+ len_counts[1] != 1)
+ return false;
+ entry = make_decode_table_entry(decode_results,
+ *sorted_syms, 1);
+ }
+ /*
+ * Note: the decode table still must be fully initialized, in
+ * case the stream is malformed and contains bits from the part
+ * of the codespace the incomplete code doesn't use.
+ */
+ for (i = 0; i < (1U << table_bits); i++)
+ decode_table[i] = entry;
+ return true;
+ }
+
+ /*
+ * The lengths form a complete code. Now, enumerate the codewords in
+ * lexicographic order and fill the decode table entries for each one.
+ *
+ * First, process all codewords with len <= table_bits. Each one gets
+ * '2^(table_bits-len)' direct entries in the table.
+ *
+ * Since DEFLATE uses bit-reversed codewords, these entries aren't
+ * consecutive but rather are spaced '2^len' entries apart. This makes
+ * filling them naively somewhat awkward and inefficient, since strided
+ * stores are less cache-friendly and preclude the use of word or
+ * vector-at-a-time stores to fill multiple entries per instruction.
+ *
+ * To optimize this, we incrementally double the table size. When
+ * processing codewords with length 'len', the table is treated as
+ * having only '2^len' entries, so each codeword uses just one entry.
+ * Then, each time 'len' is incremented, the table size is doubled and
+ * the first half is copied to the second half. This significantly
+ * improves performance over naively doing strided stores.
+ *
+ * Note that some entries copied for each table doubling may not have
+ * been initialized yet, but it doesn't matter since they're guaranteed
+ * to be initialized later (because the Huffman code is complete).
+ */
+ codeword = 0;
+ len = 1;
+ while ((count = len_counts[len]) == 0)
+ len++;
+ cur_table_end = 1U << len;
+ while (len <= table_bits) {
+ /* Process all 'count' codewords with length 'len' bits. */
+ do {
+ unsigned bit;
+
+ /* Fill the first entry for the current codeword. */
+ decode_table[codeword] =
+ make_decode_table_entry(decode_results,
+ *sorted_syms++, len);
+
+ if (codeword == cur_table_end - 1) {
+ /* Last codeword (all 1's) */
+ for (; len < table_bits; len++) {
+ memcpy(&decode_table[cur_table_end],
+ decode_table,
+ cur_table_end *
+ sizeof(decode_table[0]));
+ cur_table_end <<= 1;
+ }
+ return true;
+ }
+ /*
+ * To advance to the lexicographically next codeword in
+ * the canonical code, the codeword must be incremented,
+ * then 0's must be appended to the codeword as needed
+ * to match the next codeword's length.
+ *
+ * Since the codeword is bit-reversed, appending 0's is
+ * a no-op. However, incrementing it is nontrivial. To
+ * do so efficiently, use the 'bsr' instruction to find
+ * the last (highest order) 0 bit in the codeword, set
+ * it, and clear any later (higher order) 1 bits. But
+ * 'bsr' actually finds the highest order 1 bit, so to
+ * use it first flip all bits in the codeword by XOR'ing
+ * it with (1U << len) - 1 == cur_table_end - 1.
+ */
+ bit = 1U << bsr32(codeword ^ (cur_table_end - 1));
+ codeword &= bit - 1;
+ codeword |= bit;
+ } while (--count);
+
+ /* Advance to the next codeword length. */
+ do {
+ if (++len <= table_bits) {
+ memcpy(&decode_table[cur_table_end],
+ decode_table,
+ cur_table_end * sizeof(decode_table[0]));
+ cur_table_end <<= 1;
+ }
+ } while ((count = len_counts[len]) == 0);
+ }
+
+ /* Process codewords with len > table_bits. These require subtables. */
+ cur_table_end = 1U << table_bits;
+ subtable_prefix = -1;
+ subtable_start = 0;
+ for (;;) {
+ u32 entry;
+ unsigned i;
+ unsigned stride;
+ unsigned bit;
+
+ /*
+ * Start a new subtable if the first 'table_bits' bits of the
+ * codeword don't match the prefix of the current subtable.
+ */
+ if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) {
+ subtable_prefix = (codeword & ((1U << table_bits) - 1));
+ subtable_start = cur_table_end;
+ /*
+ * Calculate the subtable length. If the codeword has
+ * length 'table_bits + n', then the subtable needs
+ * '2^n' entries. But it may need more; if fewer than
+ * '2^n' codewords of length 'table_bits + n' remain,
+ * then the length will need to be incremented to bring
+ * in longer codewords until the subtable can be
+ * completely filled. Note that because the Huffman
+ * code is complete, it will always be possible to fill
+ * the subtable eventually.
+ */
+ subtable_bits = len - table_bits;
+ codespace_used = count;
+ while (codespace_used < (1U << subtable_bits)) {
+ subtable_bits++;
+ codespace_used = (codespace_used << 1) +
+ len_counts[table_bits + subtable_bits];
+ }
+ cur_table_end = subtable_start + (1U << subtable_bits);
+
+ /*
+ * Create the entry that points from the main table to
+ * the subtable.
+ */
+ decode_table[subtable_prefix] =
+ ((u32)subtable_start << 16) |
+ HUFFDEC_EXCEPTIONAL |
+ HUFFDEC_SUBTABLE_POINTER |
+ (subtable_bits << 8) | table_bits;
+ }
+
+ /* Fill the subtable entries for the current codeword. */
+ entry = make_decode_table_entry(decode_results, *sorted_syms++,
+ len - table_bits);
+ i = subtable_start + (codeword >> table_bits);
+ stride = 1U << (len - table_bits);
+ do {
+ decode_table[i] = entry;
+ i += stride;
+ } while (i < cur_table_end);
+
+ /* Advance to the next codeword. */
+ if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */
+ return true;
+ bit = 1U << bsr32(codeword ^ ((1U << len) - 1));
+ codeword &= bit - 1;
+ codeword |= bit;
+ count--;
+ while (count == 0)
+ count = len_counts[++len];
+ }
+}
+
+/* Build the decode table for the precode. */
+static bool
+build_precode_decode_table(struct libdeflate_decompressor *d)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
+
+ STATIC_ASSERT(ARRAY_LEN(precode_decode_results) ==
+ DEFLATE_NUM_PRECODE_SYMS);
+
+ return build_decode_table(d->u.l.precode_decode_table,
+ d->u.precode_lens,
+ DEFLATE_NUM_PRECODE_SYMS,
+ precode_decode_results,
+ PRECODE_TABLEBITS,
+ DEFLATE_MAX_PRE_CODEWORD_LEN,
+ d->sorted_syms,
+ NULL);
+}
+
+/* Build the decode table for the literal/length code. */
+static bool
+build_litlen_decode_table(struct libdeflate_decompressor *d,
+ unsigned num_litlen_syms, unsigned num_offset_syms)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(LITLEN_TABLEBITS == 11 && LITLEN_ENOUGH == 2342);
+
+ STATIC_ASSERT(ARRAY_LEN(litlen_decode_results) ==
+ DEFLATE_NUM_LITLEN_SYMS);
+
+ return build_decode_table(d->u.litlen_decode_table,
+ d->u.l.lens,
+ num_litlen_syms,
+ litlen_decode_results,
+ LITLEN_TABLEBITS,
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ d->sorted_syms,
+ &d->litlen_tablebits);
+}
+
+/* Build the decode table for the offset code. */
+static bool
+build_offset_decode_table(struct libdeflate_decompressor *d,
+ unsigned num_litlen_syms, unsigned num_offset_syms)
+{
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
+ STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
+
+ STATIC_ASSERT(ARRAY_LEN(offset_decode_results) ==
+ DEFLATE_NUM_OFFSET_SYMS);
+
+ return build_decode_table(d->offset_decode_table,
+ d->u.l.lens + num_litlen_syms,
+ num_offset_syms,
+ offset_decode_results,
+ OFFSET_TABLEBITS,
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN,
+ d->sorted_syms,
+ NULL);
+}
+
+/*****************************************************************************
+ * Main decompression routine
+ *****************************************************************************/
+
+typedef enum libdeflate_result (*decompress_func_t)
+ (struct libdeflate_decompressor * restrict d,
+ const void * restrict in, size_t in_nbytes,
+ void * restrict out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
+
+#define FUNCNAME deflate_decompress_default
+#undef ATTRIBUTES
+#undef EXTRACT_VARBITS
+#undef EXTRACT_VARBITS8
+/*
+ * decompress_template.h
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This is the actual DEFLATE decompression routine, lifted out of
+ * deflate_decompress.c so that it can be compiled multiple times with different
+ * target instruction sets.
+ */
+
+#ifndef ATTRIBUTES
+# define ATTRIBUTES
+#endif
+#ifndef EXTRACT_VARBITS
+# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
+#endif
+#ifndef EXTRACT_VARBITS8
+# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
+#endif
+
+static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED
+FUNCNAME(struct libdeflate_decompressor * restrict d,
+ const void * restrict in, size_t in_nbytes,
+ void * restrict out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
+{
+ u8 *out_next = (u8*)out;
+ u8 * const out_end = out_next + out_nbytes_avail;
+ u8 * const out_fastloop_end =
+ out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
+
+ /* Input bitstream state; see deflate_decompress.c for documentation */
+ const u8 *in_next = (u8*)in;
+ const u8 * const in_end = in_next + in_nbytes;
+ const u8 * const in_fastloop_end =
+ in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
+ bitbuf_t bitbuf = 0;
+ bitbuf_t saved_bitbuf;
+ u32 bitsleft = 0;
+ size_t overread_count = 0;
+
+ bool is_final_block;
+ unsigned block_type;
+ unsigned num_litlen_syms;
+ unsigned num_offset_syms;
+ bitbuf_t litlen_tablemask;
+ u32 entry;
+
+next_block:
+ /* Starting to read the next block */
+ ;
+
+ STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
+ REFILL_BITS();
+
+ /* BFINAL: 1 bit */
+ is_final_block = bitbuf & BITMASK(1);
+
+ /* BTYPE: 2 bits */
+ block_type = (bitbuf >> 1) & BITMASK(2);
+
+ if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
+
+ /* Dynamic Huffman block */
+
+ /* The order in which precode lengths are stored */
+ static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+ };
+
+ unsigned num_explicit_precode_lens;
+ unsigned i;
+
+ /* Read the codeword length counts. */
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
+ num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
+ num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
+ num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
+
+ d->static_codes_loaded = false;
+
+ /*
+ * Read the precode codeword lengths.
+ *
+ * A 64-bit bitbuffer is just one bit too small to hold the
+ * maximum number of precode lens, so to minimize branches we
+ * merge one len with the previous fields.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
+ if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
+ d->u.precode_lens[deflate_precode_lens_permutation[0]] =
+ (bitbuf >> 17) & BITMASK(3);
+ bitbuf >>= 20;
+ bitsleft -= 20;
+ REFILL_BITS();
+ i = 1;
+ do {
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ } else {
+ bitbuf >>= 17;
+ bitsleft -= 17;
+ i = 0;
+ do {
+ if ((u8)bitsleft < 3)
+ REFILL_BITS();
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ }
+ for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
+
+ /* Build the decode table for the precode. */
+ SAFETY_CHECK(build_precode_decode_table(d));
+
+ /* Decode the litlen and offset codeword lengths. */
+ i = 0;
+ do {
+ unsigned presym;
+ u8 rep_val;
+ unsigned rep_count;
+
+ if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
+ REFILL_BITS();
+
+ /*
+ * The code below assumes that the precode decode table
+ * doesn't have any subtables.
+ */
+ STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
+
+ /* Decode the next precode symbol. */
+ entry = d->u.l.precode_decode_table[
+ bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ presym = entry >> 16;
+
+ if (presym < 16) {
+ /* Explicit codeword length */
+ d->u.l.lens[i++] = presym;
+ continue;
+ }
+
+ /* Run-length encoded codeword lengths */
+
+ /*
+ * Note: we don't need to immediately verify that the
+ * repeat count doesn't overflow the number of elements,
+ * since we've sized the lens array to have enough extra
+ * space to allow for the worst-case overrun (138 zeroes
+ * when only 1 length was remaining).
+ *
+ * In the case of the small repeat counts (presyms 16
+ * and 17), it is fastest to always write the maximum
+ * number of entries. That gets rid of branches that
+ * would otherwise be required.
+ *
+ * It is not just because of the numerical order that
+ * our checks go in the order 'presym < 16', 'presym ==
+ * 16', and 'presym == 17'. For typical data this is
+ * ordered from most frequent to least frequent case.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
+
+ if (presym == 16) {
+ /* Repeat the previous length 3 - 6 times. */
+ SAFETY_CHECK(i != 0);
+ rep_val = d->u.l.lens[i - 1];
+ STATIC_ASSERT(3 + BITMASK(2) == 6);
+ rep_count = 3 + (bitbuf & BITMASK(2));
+ bitbuf >>= 2;
+ bitsleft -= 2;
+ d->u.l.lens[i + 0] = rep_val;
+ d->u.l.lens[i + 1] = rep_val;
+ d->u.l.lens[i + 2] = rep_val;
+ d->u.l.lens[i + 3] = rep_val;
+ d->u.l.lens[i + 4] = rep_val;
+ d->u.l.lens[i + 5] = rep_val;
+ i += rep_count;
+ } else if (presym == 17) {
+ /* Repeat zero 3 - 10 times. */
+ STATIC_ASSERT(3 + BITMASK(3) == 10);
+ rep_count = 3 + (bitbuf & BITMASK(3));
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ d->u.l.lens[i + 0] = 0;
+ d->u.l.lens[i + 1] = 0;
+ d->u.l.lens[i + 2] = 0;
+ d->u.l.lens[i + 3] = 0;
+ d->u.l.lens[i + 4] = 0;
+ d->u.l.lens[i + 5] = 0;
+ d->u.l.lens[i + 6] = 0;
+ d->u.l.lens[i + 7] = 0;
+ d->u.l.lens[i + 8] = 0;
+ d->u.l.lens[i + 9] = 0;
+ i += rep_count;
+ } else {
+ /* Repeat zero 11 - 138 times. */
+ STATIC_ASSERT(11 + BITMASK(7) == 138);
+ rep_count = 11 + (bitbuf & BITMASK(7));
+ bitbuf >>= 7;
+ bitsleft -= 7;
+ memset(&d->u.l.lens[i], 0,
+ rep_count * sizeof(d->u.l.lens[i]));
+ i += rep_count;
+ }
+ } while (i < num_litlen_syms + num_offset_syms);
+
+ /* Unnecessary, but check this for consistency with zlib. */
+ SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
+
+ } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
+ u16 len, nlen;
+
+ /*
+ * Uncompressed block: copy 'len' bytes literally from the input
+ * buffer to the output buffer.
+ */
+
+ bitsleft -= 3; /* for BTYPE and BFINAL */
+
+ /*
+ * Align the bitstream to the next byte boundary. This means
+ * the next byte boundary as if we were reading a byte at a
+ * time. Therefore, we have to rewind 'in_next' by any bytes
+ * that have been refilled but not actually consumed yet (not
+ * counting overread bytes, which don't increment 'in_next').
+ */
+ bitsleft = (u8)bitsleft;
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+ in_next -= (bitsleft >> 3) - overread_count;
+ overread_count = 0;
+ bitbuf = 0;
+ bitsleft = 0;
+
+ SAFETY_CHECK(in_end - in_next >= 4);
+ len = get_unaligned_le16(in_next);
+ nlen = get_unaligned_le16(in_next + 2);
+ in_next += 4;
+
+ SAFETY_CHECK(len == (u16)~nlen);
+ if (unlikely(len > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ SAFETY_CHECK(len <= in_end - in_next);
+
+ memcpy(out_next, in_next, len);
+ in_next += len;
+ out_next += len;
+
+ goto block_done;
+
+ } else {
+ unsigned i;
+
+ SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
+
+ /*
+ * Static Huffman block: build the decode tables for the static
+ * codes. Skip doing so if the tables are already set up from
+ * an earlier static block; this speeds up decompression of
+ * degenerate input of many empty or very short static blocks.
+ *
+ * Afterwards, the remainder is the same as decompressing a
+ * dynamic Huffman block.
+ */
+
+ bitbuf >>= 3; /* for BTYPE and BFINAL */
+ bitsleft -= 3;
+
+ if (d->static_codes_loaded)
+ goto have_decode_tables;
+
+ d->static_codes_loaded = true;
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
+
+ for (i = 0; i < 144; i++)
+ d->u.l.lens[i] = 8;
+ for (; i < 256; i++)
+ d->u.l.lens[i] = 9;
+ for (; i < 280; i++)
+ d->u.l.lens[i] = 7;
+ for (; i < 288; i++)
+ d->u.l.lens[i] = 8;
+
+ for (; i < 288 + 32; i++)
+ d->u.l.lens[i] = 5;
+
+ num_litlen_syms = 288;
+ num_offset_syms = 32;
+ }
+
+ /* Decompressing a Huffman block (either dynamic or static) */
+
+ SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
+ SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
+have_decode_tables:
+ litlen_tablemask = BITMASK(d->litlen_tablebits);
+
+ /*
+ * This is the "fastloop" for decoding literals and matches. It does
+ * bounds checks on in_next and out_next in the loop conditions so that
+ * additional bounds checks aren't needed inside the loop body.
+ *
+ * To reduce latency, the bitbuffer is refilled and the next litlen
+ * decode table entry is preloaded before each loop iteration.
+ */
+ if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
+ goto generic_loop;
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ do {
+ u32 length, offset, lit;
+ const u8 *src;
+ u8 *dst;
+
+ /*
+ * Consume the bits for the litlen decode table entry. Save the
+ * original bitbuf for later, in case the extra match length
+ * bits need to be extracted from it.
+ */
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+
+ /*
+ * Begin by checking for a "fast" literal, i.e. a literal that
+ * doesn't need a subtable.
+ */
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * On 64-bit platforms, we decode up to 2 extra fast
+ * literals in addition to the primary item, as this
+ * increases performance and still leaves enough bits
+ * remaining for what follows. We could actually do 3,
+ * assuming LITLEN_TABLEBITS=11, but that actually
+ * decreases performance slightly (perhaps by messing
+ * with the branch prediction of the conditional refill
+ * that happens later while decoding the match offset).
+ *
+ * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
+ * and FASTLOOP_MAX_BYTES_READ need to be updated if the
+ * number of extra literals decoded here is changed.
+ */
+ if (/* enough bits for 2 fast literals + length + offset preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ LENGTH_MAXBITS,
+ OFFSET_TABLEBITS) &&
+ /* enough bits for 2 fast literals + slow literal + litlen preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS)) {
+ /* 1st extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /* 2nd extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * Another fast literal, but
+ * this one is in lieu of the
+ * primary item, so it doesn't
+ * count as one of the extras.
+ */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+ } else {
+ /*
+ * Decode a literal. While doing so, preload
+ * the next litlen decode table entry and refill
+ * the bitbuffer. To reduce latency, we've
+ * arranged for there to be enough "preloadable"
+ * bits remaining to do the table preload
+ * independently of the refill.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
+ LITLEN_TABLEBITS, LITLEN_TABLEBITS));
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+
+ /*
+ * It's not a literal entry, so it can be a length entry, a
+ * subtable pointer entry, or an end-of-block entry. Detect the
+ * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Subtable pointer or end-of-block entry */
+
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+
+ /*
+ * A subtable is required. Load and consume the
+ * subtable entry. The subtable entry can be of any
+ * type: literal, length, or end-of-block.
+ */
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ /*
+ * 32-bit platforms that use the byte-at-a-time refill
+ * method have to do a refill here for there to always
+ * be enough bits to decode a literal that requires a
+ * subtable, then preload the next litlen decode table
+ * entry; or to decode a match length that requires a
+ * subtable, then preload the offset decode table entry.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS) ||
+ !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
+ OFFSET_TABLEBITS))
+ REFILL_BITS_IN_FASTLOOP();
+ if (entry & HUFFDEC_LITERAL) {
+ /* Decode a literal that required a subtable. */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ /* Else, it's a length that required a subtable. */
+ }
+
+ /*
+ * Decode the match length: the length base value associated
+ * with the litlen symbol (which we extract from the decode
+ * table entry), plus the extra length bits. We don't need to
+ * consume the extra length bits here, as they were included in
+ * the bits consumed by the entry earlier. We also don't need
+ * to check for too-long matches here, as this is inside the
+ * fastloop where it's already been verified that the output
+ * buffer has enough space remaining to copy a max-length match.
+ */
+ length = entry >> 16;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /*
+ * Decode the match offset. There are enough "preloadable" bits
+ * remaining to preload the offset decode table entry, but a
+ * refill might be needed before consuming it.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
+ OFFSET_TABLEBITS));
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
+ LITLEN_TABLEBITS)) {
+ /*
+ * Decoding a match offset on a 64-bit platform. We may
+ * need to refill once, but then we can decode the whole
+ * offset and preload the next litlen table entry.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ } else {
+ /* Decoding a match offset on a 32-bit platform */
+ REFILL_BITS_IN_FASTLOOP();
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ REFILL_BITS_IN_FASTLOOP();
+ /* No further refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(
+ OFFSET_MAXBITS - OFFSET_TABLEBITS));
+ } else {
+ /* No refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
+ }
+ }
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /* Validate the match offset; needed even in the fastloop. */
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ /*
+ * Before starting to issue the instructions to copy the match,
+ * refill the bitbuffer and preload the litlen decode table
+ * entry for the next loop iteration. This can increase
+ * performance by allowing the latency of the match copy to
+ * overlap with these other operations. To further reduce
+ * latency, we've arranged for there to be enough bits remaining
+ * to do the table preload independently of the refill, except
+ * on 32-bit platforms using the byte-at-a-time refill method.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(
+ MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
+ OFFSET_MAXFASTBITS),
+ LITLEN_TABLEBITS) &&
+ unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+
+ /*
+ * Copy the match. On most CPUs the fastest method is a
+ * word-at-a-time copy, unconditionally copying about 5 words
+ * since this is enough for most matches without being too much.
+ *
+ * The normal word-at-a-time copy works for offset >= WORDBYTES,
+ * which is most cases. The case of offset == 1 is also common
+ * and is worth optimizing for, since it is just RLE encoding of
+ * the previous byte, which is the result of compressing long
+ * runs of the same byte.
+ *
+ * Writing past the match 'length' is allowed here, since it's
+ * been ensured there is enough output space left for a slight
+ * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
+ * the maximum possible overrun here is changed.
+ */
+ if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
+ machine_word_t v;
+
+ /*
+ * This part tends to get auto-vectorized, so keep it
+ * copying a multiple of 16 bytes at a time.
+ */
+ v = (machine_word_t)0x0101010101010101 * src[0];
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ do {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ } while (dst < out_next);
+ } else {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+ } while (in_next < in_fastloop_end && out_next < out_fastloop_end);
+
+ /*
+ * This is the generic loop for decoding literals and matches. This
+ * handles cases where in_next and out_next are close to the end of
+ * their respective buffers. Usually this loop isn't performance-
+ * critical, as most time is spent in the fastloop above instead. We
+ * therefore omit some optimizations here in favor of smaller code.
+ */
+generic_loop:
+ for (;;) {
+ u32 length, offset;
+ const u8 *src;
+ u8 *dst;
+
+ REFILL_BITS();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ }
+ length = entry >> 16;
+ if (entry & HUFFDEC_LITERAL) {
+ if (unlikely(out_next == out_end))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ *out_next++ = length;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+ if (unlikely(length > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+
+ if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
+ REFILL_BITS();
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ if (!CAN_CONSUME(OFFSET_MAXBITS))
+ REFILL_BITS();
+ }
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+
+block_done:
+ /* Finished decoding a block */
+
+ if (!is_final_block)
+ goto next_block;
+
+ /* That was the last block. */
+
+ bitsleft = (u8)bitsleft;
+
+ /*
+ * If any of the implicit appended zero bytes were consumed (not just
+ * refilled) before hitting end of stream, then the data is bad.
+ */
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+
+ /* Optionally return the actual number of bytes consumed. */
+ if (actual_in_nbytes_ret) {
+ /* Don't count bytes that were refilled but not consumed. */
+ in_next -= (bitsleft >> 3) - overread_count;
+
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
+ }
+
+ /* Optionally return the actual number of bytes written. */
+ if (actual_out_nbytes_ret) {
+ *actual_out_nbytes_ret = out_next - (u8 *)out;
+ } else {
+ if (out_next != out_end)
+ return LIBDEFLATE_SHORT_OUTPUT;
+ }
+ return LIBDEFLATE_SUCCESS;
+}
+
+#undef FUNCNAME
+#undef ATTRIBUTES
+#undef EXTRACT_VARBITS
+#undef EXTRACT_VARBITS8
+
+
+/* Include architecture-specific implementation(s) if available. */
+#undef DEFAULT_IMPL
+#undef arch_select_decompress_func
+#if defined(ARCH_X86_32) || defined(ARCH_X86_64)
+#ifndef LIB_X86_DECOMPRESS_IMPL_H
+#define LIB_X86_DECOMPRESS_IMPL_H
+
+/*
+ * BMI2 optimized version
+ *
+ * FIXME: with MSVC, this isn't actually compiled with BMI2 code generation
+ * enabled yet. That would require that this be moved to its own .c file.
+ */
+#if HAVE_BMI2_INTRIN
+# define deflate_decompress_bmi2 deflate_decompress_bmi2
+# define FUNCNAME deflate_decompress_bmi2
+# if !HAVE_BMI2_NATIVE
+# define ATTRIBUTES _target_attribute("bmi2")
+# endif
+ /*
+ * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the
+ * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic
+ * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)';
+ * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'.
+ * Nevertheless, their implementation using the bzhi intrinsic is identical,
+ * as the bzhi instruction truncates the count to 8 bits implicitly.
+ */
+# ifndef __clang__
+# include
+# ifdef ARCH_X86_64
+# define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count))
+# define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count))
+# else
+# define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count))
+# define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count))
+# endif
+# endif
+/*
+ * decompress_template.h
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This is the actual DEFLATE decompression routine, lifted out of
+ * deflate_decompress.c so that it can be compiled multiple times with different
+ * target instruction sets.
+ */
+
+#ifndef ATTRIBUTES
+# define ATTRIBUTES
+#endif
+#ifndef EXTRACT_VARBITS
+# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
+#endif
+#ifndef EXTRACT_VARBITS8
+# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
+#endif
+
+static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED
+FUNCNAME(struct libdeflate_decompressor * restrict d,
+ const void * restrict in, size_t in_nbytes,
+ void * restrict out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
+{
+ u8 *out_next = (u8*)out;
+ u8 * const out_end = out_next + out_nbytes_avail;
+ u8 * const out_fastloop_end =
+ out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
+
+ /* Input bitstream state; see deflate_decompress.c for documentation */
+ const u8 *in_next = (u8*)in;
+ const u8 * const in_end = in_next + in_nbytes;
+ const u8 * const in_fastloop_end =
+ in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
+ bitbuf_t bitbuf = 0;
+ bitbuf_t saved_bitbuf;
+ u32 bitsleft = 0;
+ size_t overread_count = 0;
+
+ bool is_final_block;
+ unsigned block_type;
+ unsigned num_litlen_syms;
+ unsigned num_offset_syms;
+ bitbuf_t litlen_tablemask;
+ u32 entry;
+
+next_block:
+ /* Starting to read the next block */
+ ;
+
+ STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
+ REFILL_BITS();
+
+ /* BFINAL: 1 bit */
+ is_final_block = bitbuf & BITMASK(1);
+
+ /* BTYPE: 2 bits */
+ block_type = (bitbuf >> 1) & BITMASK(2);
+
+ if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
+
+ /* Dynamic Huffman block */
+
+ /* The order in which precode lengths are stored */
+ static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+ };
+
+ unsigned num_explicit_precode_lens;
+ unsigned i;
+
+ /* Read the codeword length counts. */
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
+ num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
+ num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
+
+ STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
+ num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
+
+ d->static_codes_loaded = false;
+
+ /*
+ * Read the precode codeword lengths.
+ *
+ * A 64-bit bitbuffer is just one bit too small to hold the
+ * maximum number of precode lens, so to minimize branches we
+ * merge one len with the previous fields.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
+ if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
+ d->u.precode_lens[deflate_precode_lens_permutation[0]] =
+ (bitbuf >> 17) & BITMASK(3);
+ bitbuf >>= 20;
+ bitsleft -= 20;
+ REFILL_BITS();
+ i = 1;
+ do {
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ } else {
+ bitbuf >>= 17;
+ bitsleft -= 17;
+ i = 0;
+ do {
+ if ((u8)bitsleft < 3)
+ REFILL_BITS();
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] =
+ bitbuf & BITMASK(3);
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ } while (++i < num_explicit_precode_lens);
+ }
+ for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
+ d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
+
+ /* Build the decode table for the precode. */
+ SAFETY_CHECK(build_precode_decode_table(d));
+
+ /* Decode the litlen and offset codeword lengths. */
+ i = 0;
+ do {
+ unsigned presym;
+ u8 rep_val;
+ unsigned rep_count;
+
+ if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
+ REFILL_BITS();
+
+ /*
+ * The code below assumes that the precode decode table
+ * doesn't have any subtables.
+ */
+ STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
+
+ /* Decode the next precode symbol. */
+ entry = d->u.l.precode_decode_table[
+ bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ presym = entry >> 16;
+
+ if (presym < 16) {
+ /* Explicit codeword length */
+ d->u.l.lens[i++] = presym;
+ continue;
+ }
+
+ /* Run-length encoded codeword lengths */
+
+ /*
+ * Note: we don't need to immediately verify that the
+ * repeat count doesn't overflow the number of elements,
+ * since we've sized the lens array to have enough extra
+ * space to allow for the worst-case overrun (138 zeroes
+ * when only 1 length was remaining).
+ *
+ * In the case of the small repeat counts (presyms 16
+ * and 17), it is fastest to always write the maximum
+ * number of entries. That gets rid of branches that
+ * would otherwise be required.
+ *
+ * It is not just because of the numerical order that
+ * our checks go in the order 'presym < 16', 'presym ==
+ * 16', and 'presym == 17'. For typical data this is
+ * ordered from most frequent to least frequent case.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
+
+ if (presym == 16) {
+ /* Repeat the previous length 3 - 6 times. */
+ SAFETY_CHECK(i != 0);
+ rep_val = d->u.l.lens[i - 1];
+ STATIC_ASSERT(3 + BITMASK(2) == 6);
+ rep_count = 3 + (bitbuf & BITMASK(2));
+ bitbuf >>= 2;
+ bitsleft -= 2;
+ d->u.l.lens[i + 0] = rep_val;
+ d->u.l.lens[i + 1] = rep_val;
+ d->u.l.lens[i + 2] = rep_val;
+ d->u.l.lens[i + 3] = rep_val;
+ d->u.l.lens[i + 4] = rep_val;
+ d->u.l.lens[i + 5] = rep_val;
+ i += rep_count;
+ } else if (presym == 17) {
+ /* Repeat zero 3 - 10 times. */
+ STATIC_ASSERT(3 + BITMASK(3) == 10);
+ rep_count = 3 + (bitbuf & BITMASK(3));
+ bitbuf >>= 3;
+ bitsleft -= 3;
+ d->u.l.lens[i + 0] = 0;
+ d->u.l.lens[i + 1] = 0;
+ d->u.l.lens[i + 2] = 0;
+ d->u.l.lens[i + 3] = 0;
+ d->u.l.lens[i + 4] = 0;
+ d->u.l.lens[i + 5] = 0;
+ d->u.l.lens[i + 6] = 0;
+ d->u.l.lens[i + 7] = 0;
+ d->u.l.lens[i + 8] = 0;
+ d->u.l.lens[i + 9] = 0;
+ i += rep_count;
+ } else {
+ /* Repeat zero 11 - 138 times. */
+ STATIC_ASSERT(11 + BITMASK(7) == 138);
+ rep_count = 11 + (bitbuf & BITMASK(7));
+ bitbuf >>= 7;
+ bitsleft -= 7;
+ memset(&d->u.l.lens[i], 0,
+ rep_count * sizeof(d->u.l.lens[i]));
+ i += rep_count;
+ }
+ } while (i < num_litlen_syms + num_offset_syms);
+
+ /* Unnecessary, but check this for consistency with zlib. */
+ SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
+
+ } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
+ u16 len, nlen;
+
+ /*
+ * Uncompressed block: copy 'len' bytes literally from the input
+ * buffer to the output buffer.
+ */
+
+ bitsleft -= 3; /* for BTYPE and BFINAL */
+
+ /*
+ * Align the bitstream to the next byte boundary. This means
+ * the next byte boundary as if we were reading a byte at a
+ * time. Therefore, we have to rewind 'in_next' by any bytes
+ * that have been refilled but not actually consumed yet (not
+ * counting overread bytes, which don't increment 'in_next').
+ */
+ bitsleft = (u8)bitsleft;
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+ in_next -= (bitsleft >> 3) - overread_count;
+ overread_count = 0;
+ bitbuf = 0;
+ bitsleft = 0;
+
+ SAFETY_CHECK(in_end - in_next >= 4);
+ len = get_unaligned_le16(in_next);
+ nlen = get_unaligned_le16(in_next + 2);
+ in_next += 4;
+
+ SAFETY_CHECK(len == (u16)~nlen);
+ if (unlikely(len > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ SAFETY_CHECK(len <= in_end - in_next);
+
+ memcpy(out_next, in_next, len);
+ in_next += len;
+ out_next += len;
+
+ goto block_done;
+
+ } else {
+ unsigned i;
+
+ SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
+
+ /*
+ * Static Huffman block: build the decode tables for the static
+ * codes. Skip doing so if the tables are already set up from
+ * an earlier static block; this speeds up decompression of
+ * degenerate input of many empty or very short static blocks.
+ *
+ * Afterwards, the remainder is the same as decompressing a
+ * dynamic Huffman block.
+ */
+
+ bitbuf >>= 3; /* for BTYPE and BFINAL */
+ bitsleft -= 3;
+
+ if (d->static_codes_loaded)
+ goto have_decode_tables;
+
+ d->static_codes_loaded = true;
+
+ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
+ STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
+
+ for (i = 0; i < 144; i++)
+ d->u.l.lens[i] = 8;
+ for (; i < 256; i++)
+ d->u.l.lens[i] = 9;
+ for (; i < 280; i++)
+ d->u.l.lens[i] = 7;
+ for (; i < 288; i++)
+ d->u.l.lens[i] = 8;
+
+ for (; i < 288 + 32; i++)
+ d->u.l.lens[i] = 5;
+
+ num_litlen_syms = 288;
+ num_offset_syms = 32;
+ }
+
+ /* Decompressing a Huffman block (either dynamic or static) */
+
+ SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
+ SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
+have_decode_tables:
+ litlen_tablemask = BITMASK(d->litlen_tablebits);
+
+ /*
+ * This is the "fastloop" for decoding literals and matches. It does
+ * bounds checks on in_next and out_next in the loop conditions so that
+ * additional bounds checks aren't needed inside the loop body.
+ *
+ * To reduce latency, the bitbuffer is refilled and the next litlen
+ * decode table entry is preloaded before each loop iteration.
+ */
+ if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
+ goto generic_loop;
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ do {
+ u32 length, offset, lit;
+ const u8 *src;
+ u8 *dst;
+
+ /*
+ * Consume the bits for the litlen decode table entry. Save the
+ * original bitbuf for later, in case the extra match length
+ * bits need to be extracted from it.
+ */
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+
+ /*
+ * Begin by checking for a "fast" literal, i.e. a literal that
+ * doesn't need a subtable.
+ */
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * On 64-bit platforms, we decode up to 2 extra fast
+ * literals in addition to the primary item, as this
+ * increases performance and still leaves enough bits
+ * remaining for what follows. We could actually do 3,
+ * assuming LITLEN_TABLEBITS=11, but that actually
+ * decreases performance slightly (perhaps by messing
+ * with the branch prediction of the conditional refill
+ * that happens later while decoding the match offset).
+ *
+ * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
+ * and FASTLOOP_MAX_BYTES_READ need to be updated if the
+ * number of extra literals decoded here is changed.
+ */
+ if (/* enough bits for 2 fast literals + length + offset preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ LENGTH_MAXBITS,
+ OFFSET_TABLEBITS) &&
+ /* enough bits for 2 fast literals + slow literal + litlen preload? */
+ CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS)) {
+ /* 1st extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /* 2nd extra fast literal */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ *out_next++ = lit;
+ if (entry & HUFFDEC_LITERAL) {
+ /*
+ * Another fast literal, but
+ * this one is in lieu of the
+ * primary item, so it doesn't
+ * count as one of the extras.
+ */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+ } else {
+ /*
+ * Decode a literal. While doing so, preload
+ * the next litlen decode table entry and refill
+ * the bitbuffer. To reduce latency, we've
+ * arranged for there to be enough "preloadable"
+ * bits remaining to do the table preload
+ * independently of the refill.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
+ LITLEN_TABLEBITS, LITLEN_TABLEBITS));
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ }
+
+ /*
+ * It's not a literal entry, so it can be a length entry, a
+ * subtable pointer entry, or an end-of-block entry. Detect the
+ * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Subtable pointer or end-of-block entry */
+
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+
+ /*
+ * A subtable is required. Load and consume the
+ * subtable entry. The subtable entry can be of any
+ * type: literal, length, or end-of-block.
+ */
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ /*
+ * 32-bit platforms that use the byte-at-a-time refill
+ * method have to do a refill here for there to always
+ * be enough bits to decode a literal that requires a
+ * subtable, then preload the next litlen decode table
+ * entry; or to decode a match length that requires a
+ * subtable, then preload the offset decode table entry.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
+ LITLEN_TABLEBITS) ||
+ !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
+ OFFSET_TABLEBITS))
+ REFILL_BITS_IN_FASTLOOP();
+ if (entry & HUFFDEC_LITERAL) {
+ /* Decode a literal that required a subtable. */
+ lit = entry >> 16;
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+ *out_next++ = lit;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ /* Else, it's a length that required a subtable. */
+ }
+
+ /*
+ * Decode the match length: the length base value associated
+ * with the litlen symbol (which we extract from the decode
+ * table entry), plus the extra length bits. We don't need to
+ * consume the extra length bits here, as they were included in
+ * the bits consumed by the entry earlier. We also don't need
+ * to check for too-long matches here, as this is inside the
+ * fastloop where it's already been verified that the output
+ * buffer has enough space remaining to copy a max-length match.
+ */
+ length = entry >> 16;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /*
+ * Decode the match offset. There are enough "preloadable" bits
+ * remaining to preload the offset decode table entry, but a
+ * refill might be needed before consuming it.
+ */
+ STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
+ OFFSET_TABLEBITS));
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
+ LITLEN_TABLEBITS)) {
+ /*
+ * Decoding a match offset on a 64-bit platform. We may
+ * need to refill once, but then we can decode the whole
+ * offset and preload the next litlen table entry.
+ */
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
+ LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ } else {
+ /* Decoding a match offset on a 32-bit platform */
+ REFILL_BITS_IN_FASTLOOP();
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ /* Offset codeword requires a subtable */
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ REFILL_BITS_IN_FASTLOOP();
+ /* No further refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(
+ OFFSET_MAXBITS - OFFSET_TABLEBITS));
+ } else {
+ /* No refill needed before extra bits */
+ STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
+ }
+ }
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry; /* optimization: subtract full entry */
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+
+ /* Validate the match offset; needed even in the fastloop. */
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ /*
+ * Before starting to issue the instructions to copy the match,
+ * refill the bitbuffer and preload the litlen decode table
+ * entry for the next loop iteration. This can increase
+ * performance by allowing the latency of the match copy to
+ * overlap with these other operations. To further reduce
+ * latency, we've arranged for there to be enough bits remaining
+ * to do the table preload independently of the refill, except
+ * on 32-bit platforms using the byte-at-a-time refill method.
+ */
+ if (!CAN_CONSUME_AND_THEN_PRELOAD(
+ MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
+ OFFSET_MAXFASTBITS),
+ LITLEN_TABLEBITS) &&
+ unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
+ REFILL_BITS_IN_FASTLOOP();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ REFILL_BITS_IN_FASTLOOP();
+
+ /*
+ * Copy the match. On most CPUs the fastest method is a
+ * word-at-a-time copy, unconditionally copying about 5 words
+ * since this is enough for most matches without being too much.
+ *
+ * The normal word-at-a-time copy works for offset >= WORDBYTES,
+ * which is most cases. The case of offset == 1 is also common
+ * and is worth optimizing for, since it is just RLE encoding of
+ * the previous byte, which is the result of compressing long
+ * runs of the same byte.
+ *
+ * Writing past the match 'length' is allowed here, since it's
+ * been ensured there is enough output space left for a slight
+ * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
+ * the maximum possible overrun here is changed.
+ */
+ if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
+ machine_word_t v;
+
+ /*
+ * This part tends to get auto-vectorized, so keep it
+ * copying a multiple of 16 bytes at a time.
+ */
+ v = (machine_word_t)0x0101010101010101 * src[0];
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ while (dst < out_next) {
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ store_word_unaligned(v, dst);
+ dst += WORDBYTES;
+ }
+ } else if (UNALIGNED_ACCESS_IS_FAST) {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ do {
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ store_word_unaligned(load_word_unaligned(src), dst);
+ src += offset;
+ dst += offset;
+ } while (dst < out_next);
+ } else {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+ } while (in_next < in_fastloop_end && out_next < out_fastloop_end);
+
+ /*
+ * This is the generic loop for decoding literals and matches. This
+ * handles cases where in_next and out_next are close to the end of
+ * their respective buffers. Usually this loop isn't performance-
+ * critical, as most time is spent in the fastloop above instead. We
+ * therefore omit some optimizations here in favor of smaller code.
+ */
+generic_loop:
+ for (;;) {
+ u32 length, offset;
+ const u8 *src;
+ u8 *dst;
+
+ REFILL_BITS();
+ entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
+ entry = d->u.litlen_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ saved_bitbuf = bitbuf;
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+ }
+ length = entry >> 16;
+ if (entry & HUFFDEC_LITERAL) {
+ if (unlikely(out_next == out_end))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+ *out_next++ = length;
+ continue;
+ }
+ if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
+ goto block_done;
+ length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
+ if (unlikely(length > out_end - out_next))
+ return LIBDEFLATE_INSUFFICIENT_SPACE;
+
+ if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
+ REFILL_BITS();
+ entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
+ if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
+ bitbuf >>= OFFSET_TABLEBITS;
+ bitsleft -= OFFSET_TABLEBITS;
+ entry = d->offset_decode_table[(entry >> 16) +
+ EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
+ if (!CAN_CONSUME(OFFSET_MAXBITS))
+ REFILL_BITS();
+ }
+ offset = entry >> 16;
+ offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
+ bitbuf >>= (u8)entry;
+ bitsleft -= entry;
+
+ SAFETY_CHECK(offset <= out_next - (const u8 *)out);
+ src = out_next - offset;
+ dst = out_next;
+ out_next += length;
+
+ STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
+ *dst++ = *src++;
+ *dst++ = *src++;
+ do {
+ *dst++ = *src++;
+ } while (dst < out_next);
+ }
+
+block_done:
+ /* Finished decoding a block */
+
+ if (!is_final_block)
+ goto next_block;
+
+ /* That was the last block. */
+
+ bitsleft = (u8)bitsleft;
+
+ /*
+ * If any of the implicit appended zero bytes were consumed (not just
+ * refilled) before hitting end of stream, then the data is bad.
+ */
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3));
+
+ /* Optionally return the actual number of bytes consumed. */
+ if (actual_in_nbytes_ret) {
+ /* Don't count bytes that were refilled but not consumed. */
+ in_next -= (bitsleft >> 3) - overread_count;
+
+ *actual_in_nbytes_ret = in_next - (u8 *)in;
+ }
+
+ /* Optionally return the actual number of bytes written. */
+ if (actual_out_nbytes_ret) {
+ *actual_out_nbytes_ret = out_next - (u8 *)out;
+ } else {
+ if (out_next != out_end)
+ return LIBDEFLATE_SHORT_OUTPUT;
+ }
+ return LIBDEFLATE_SUCCESS;
+}
+
+#undef FUNCNAME
+#undef ATTRIBUTES
+#undef EXTRACT_VARBITS
+#undef EXTRACT_VARBITS8
+
+#endif /* HAVE_BMI2_INTRIN */
+
+#if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE
+#define DEFAULT_IMPL deflate_decompress_bmi2
+#else
+static inline decompress_func_t
+arch_select_decompress_func(void)
+{
+#ifdef deflate_decompress_bmi2
+ if (HAVE_BMI2(get_x86_cpu_features()))
+ return deflate_decompress_bmi2;
+#endif
+ return NULL;
+}
+#define arch_select_decompress_func arch_select_decompress_func
+#endif
+
+#endif /* LIB_X86_DECOMPRESS_IMPL_H */
+
+#endif
+
+#ifndef DEFAULT_IMPL
+# define DEFAULT_IMPL deflate_decompress_default
+#endif
+
+#ifdef arch_select_decompress_func
+static enum libdeflate_result
+dispatch_decomp(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
+
+static volatile decompress_func_t decompress_impl = dispatch_decomp;
+
+/* Choose the best implementation at runtime. */
+static enum libdeflate_result
+dispatch_decomp(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
+{
+ decompress_func_t f = arch_select_decompress_func();
+
+ if (f == NULL)
+ f = DEFAULT_IMPL;
+
+ decompress_impl = f;
+ return f(d, in, in_nbytes, out, out_nbytes_avail,
+ actual_in_nbytes_ret, actual_out_nbytes_ret);
+}
+#else
+/* The best implementation is statically known, so call it directly. */
+# define decompress_impl DEFAULT_IMPL
+#endif
+
+/*
+ * This is the main DEFLATE decompression routine. See libdeflate.h for the
+ * documentation.
+ *
+ * Note that the real code is in decompress_template.h. The part here just
+ * handles calling the appropriate implementation depending on the CPU features
+ * at runtime.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret)
+{
+ return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail,
+ actual_in_nbytes_ret, actual_out_nbytes_ret);
+}
+
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret)
+{
+ return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
+ out, out_nbytes_avail,
+ NULL, actual_out_nbytes_ret);
+}
+
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options)
+{
+ struct libdeflate_decompressor *d;
+
+ /*
+ * Note: if more fields are added to libdeflate_options, this code will
+ * need to be updated to support both the old and new structs.
+ */
+ if (options->sizeof_options != sizeof(*options))
+ return NULL;
+
+ d = (libdeflate_decompressor*)(options->malloc_func ? options->malloc_func :
+ libdeflate_default_malloc_func)(sizeof(*d));
+ if (d == NULL)
+ return NULL;
+ /*
+ * Note that only certain parts of the decompressor actually must be
+ * initialized here:
+ *
+ * - 'static_codes_loaded' must be initialized to false.
+ *
+ * - The first half of the main portion of each decode table must be
+ * initialized to any value, to avoid reading from uninitialized
+ * memory during table expansion in build_decode_table(). (Although,
+ * this is really just to avoid warnings with dynamic tools like
+ * valgrind, since build_decode_table() is guaranteed to initialize
+ * all entries eventually anyway.)
+ *
+ * - 'free_func' must be set.
+ *
+ * But for simplicity, we currently just zero the whole decompressor.
+ */
+ memset(d, 0, sizeof(*d));
+ d->free_func = options->free_func ?
+ options->free_func : libdeflate_default_free_func;
+ return d;
+}
+
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor(void)
+{
+ static const struct libdeflate_options defaults = {
+ /*.sizeof_options = */sizeof(defaults),
+ };
+ return libdeflate_alloc_decompressor_ex(&defaults);
+}
+
+LIBDEFLATEAPI void
+libdeflate_free_decompressor(struct libdeflate_decompressor *d)
+{
+ if (d)
+ d->free_func(d);
+}
+
+
+/*
+ * utils.c - utility functions for libdeflate
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef FREESTANDING
+# define malloc NULL
+# define free NULL
+#else
+# include
+#endif
+
+malloc_func_t libdeflate_default_malloc_func = malloc;
+free_func_t libdeflate_default_free_func = free;
+
+void *
+libdeflate_aligned_malloc(malloc_func_t malloc_func,
+ size_t alignment, size_t size)
+{
+ void *ptr = (*malloc_func)(sizeof(void *) + alignment - 1 + size);
+
+ if (ptr) {
+ void *orig_ptr = ptr;
+
+ ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
+ ((void **)ptr)[-1] = orig_ptr;
+ }
+ return ptr;
+}
+
+void
+libdeflate_aligned_free(free_func_t free_func, void *ptr)
+{
+ (*free_func)(((void **)ptr)[-1]);
+}
+
+LIBDEFLATEAPI void
+libdeflate_set_memory_allocator(malloc_func_t malloc_func,
+ free_func_t free_func)
+{
+ libdeflate_default_malloc_func = malloc_func;
+ libdeflate_default_free_func = free_func;
+}
+
+/*
+ * Implementations of libc functions for freestanding library builds.
+ * Normal library builds don't use these. Not optimized yet; usually the
+ * compiler expands these functions and doesn't actually call them anyway.
+ */
+#ifdef FREESTANDING
+#undef memset
+void * __attribute__((weak))
+memset(void *s, int c, size_t n)
+{
+ u8 *p = s;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ p[i] = c;
+ return s;
+}
+
+#undef memcpy
+void * __attribute__((weak))
+memcpy(void *dest, const void *src, size_t n)
+{
+ u8 *d = dest;
+ const u8 *s = src;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+ return dest;
+}
+
+#undef memmove
+void * __attribute__((weak))
+memmove(void *dest, const void *src, size_t n)
+{
+ u8 *d = dest;
+ const u8 *s = src;
+ size_t i;
+
+ if (d <= s)
+ return memcpy(d, s, n);
+
+ for (i = n; i > 0; i--)
+ d[i - 1] = s[i - 1];
+ return dest;
+}
+
+#undef memcmp
+int __attribute__((weak))
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ const u8 *p1 = s1;
+ const u8 *p2 = s2;
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ if (p1[i] != p2[i])
+ return (int)p1[i] - (int)p2[i];
+ }
+ return 0;
+}
+#endif /* FREESTANDING */
+
+#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
+#include
+#include
+void
+libdeflate_assertion_failed(const char *expr, const char *file, int line)
+{
+ fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
+ abort();
+}
+#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */
+
+/*
+ * x86/cpu_features.c - feature detection for x86 CPUs
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_DYNAMIC_X86_CPU_FEATURES
+
+/*
+ * With old GCC versions we have to manually save and restore the x86_32 PIC
+ * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
+ */
+#if defined(ARCH_X86_32) && defined(__PIC__)
+# define EBX_CONSTRAINT "=&r"
+#else
+# define EBX_CONSTRAINT "=b"
+#endif
+
+/* Execute the CPUID instruction. */
+static inline void
+cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+#ifdef _MSC_VER
+ int result[4];
+
+ __cpuidex(result, leaf, subleaf);
+ *a = result[0];
+ *b = result[1];
+ *c = result[2];
+ *d = result[3];
+#else
+ __asm__ volatile(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
+ "cpuid \n"
+ ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
+ : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
+ : "a" (leaf), "c" (subleaf));
+#endif
+}
+
+/* Read an extended control register. */
+static inline u64
+read_xcr(u32 index)
+{
+#ifdef _MSC_VER
+ return _xgetbv(index);
+#else
+ u32 d, a;
+
+ /*
+ * Execute the "xgetbv" instruction. Old versions of binutils do not
+ * recognize this instruction, so list the raw bytes instead.
+ *
+ * This must be 'volatile' to prevent this code from being moved out
+ * from under the check for OSXSAVE.
+ */
+ __asm__ volatile(".byte 0x0f, 0x01, 0xd0" :
+ "=d" (d), "=a" (a) : "c" (index));
+
+ return ((u64)d << 32) | a;
+#endif
+}
+
+static const struct cpu_feature x86_cpu_feature_table[] = {
+ {X86_CPU_FEATURE_SSE2, "sse2"},
+ {X86_CPU_FEATURE_PCLMUL, "pclmul"},
+ {X86_CPU_FEATURE_AVX, "avx"},
+ {X86_CPU_FEATURE_AVX2, "avx2"},
+ {X86_CPU_FEATURE_BMI2, "bmi2"},
+};
+
+volatile u32 libdeflate_x86_cpu_features = 0;
+
+/* Initialize libdeflate_x86_cpu_features. */
+void libdeflate_init_x86_cpu_features(void)
+{
+ u32 max_leaf, a, b, c, d;
+ u64 xcr0 = 0;
+ u32 features = 0;
+
+ /* EAX=0: Highest Function Parameter and Manufacturer ID */
+ cpuid(0, 0, &max_leaf, &b, &c, &d);
+ if (max_leaf < 1)
+ goto out;
+
+ /* EAX=1: Processor Info and Feature Bits */
+ cpuid(1, 0, &a, &b, &c, &d);
+ if (d & (1 << 26))
+ features |= X86_CPU_FEATURE_SSE2;
+ if (c & (1 << 1))
+ features |= X86_CPU_FEATURE_PCLMUL;
+ if (c & (1 << 27))
+ xcr0 = read_xcr(0);
+ if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
+ features |= X86_CPU_FEATURE_AVX;
+
+ if (max_leaf < 7)
+ goto out;
+
+ /* EAX=7, ECX=0: Extended Features */
+ cpuid(7, 0, &a, &b, &c, &d);
+ if ((b & (1 << 5)) && ((xcr0 & 0x6) == 0x6))
+ features |= X86_CPU_FEATURE_AVX2;
+ if (b & (1 << 8))
+ features |= X86_CPU_FEATURE_BMI2;
+
+out:
+ disable_cpu_features_for_testing(&features, x86_cpu_feature_table,
+ ARRAY_LEN(x86_cpu_feature_table));
+
+ libdeflate_x86_cpu_features = features | X86_CPU_FEATURES_KNOWN;
+}
+
+#endif /* HAVE_DYNAMIC_X86_CPU_FEATURES */
diff --git a/Source/ThirdParty/OpenFBX/libdeflate.h b/Source/ThirdParty/OpenFBX/libdeflate.h
new file mode 100644
index 000000000..382d895de
--- /dev/null
+++ b/Source/ThirdParty/OpenFBX/libdeflate.h
@@ -0,0 +1,411 @@
+/*
+ * libdeflate.h - public header for libdeflate
+ */
+
+#ifndef LIBDEFLATE_H
+#define LIBDEFLATE_H
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBDEFLATE_VERSION_MAJOR 1
+#define LIBDEFLATE_VERSION_MINOR 18
+#define LIBDEFLATE_VERSION_STRING "1.18"
+
+/*
+ * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
+ * __declspec(dllimport) to be used. This should be done when it's easy to do.
+ * Otherwise it's fine to skip it, since it is a very minor performance
+ * optimization that is irrelevant for most use cases of libdeflate.
+ */
+#ifndef LIBDEFLATEAPI
+# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
+# define LIBDEFLATEAPI __declspec(dllimport)
+# else
+# define LIBDEFLATEAPI
+# endif
+#endif
+
+/* ========================================================================== */
+/* Compression */
+/* ========================================================================== */
+
+struct libdeflate_compressor;
+struct libdeflate_options;
+
+/*
+ * libdeflate_alloc_compressor() allocates a new compressor that supports
+ * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression
+ * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 =
+ * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means
+ * "no compression", specifically "create a valid stream, but only emit
+ * uncompressed blocks" (this will expand the data slightly).
+ *
+ * The return value is a pointer to the new compressor, or NULL if out of memory
+ * or if the compression level is invalid (i.e. outside the range [0, 12]).
+ *
+ * Note: for compression, the sliding window size is defined at compilation time
+ * to 32768, the largest size permissible in the DEFLATE format. It cannot be
+ * changed at runtime.
+ *
+ * A single compressor is not safe to use by multiple threads concurrently.
+ * However, different threads may use different compressors concurrently.
+ */
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor(int compression_level);
+
+/*
+ * Like libdeflate_alloc_compressor(), but adds the 'options' argument.
+ */
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor_ex(int compression_level,
+ const struct libdeflate_options *options);
+
+/*
+ * libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of
+ * data. It attempts to compress 'in_nbytes' bytes of data located at 'in' and
+ * write the result to 'out', which has space for 'out_nbytes_avail' bytes. The
+ * return value is the compressed size in bytes, or 0 if the data could not be
+ * compressed to 'out_nbytes_avail' bytes or fewer (but see note below).
+ *
+ * If compression is successful, then the output data is guaranteed to be a
+ * valid DEFLATE stream that decompresses to the input data. No other
+ * guarantees are made about the output data. Notably, different versions of
+ * libdeflate can produce different compressed data for the same uncompressed
+ * data, even at the same compression level. Do ***NOT*** do things like
+ * writing tests that compare compressed data to a golden output, as this can
+ * break when libdeflate is updated. (This property isn't specific to
+ * libdeflate; the same is true for zlib and other compression libraries too.)
+ */
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * libdeflate_deflate_compress_bound() returns a worst-case upper bound on the
+ * number of bytes of compressed data that may be produced by compressing any
+ * buffer of length less than or equal to 'in_nbytes' using
+ * libdeflate_deflate_compress() with the specified compressor. This bound will
+ * necessarily be a number greater than or equal to 'in_nbytes'. It may be an
+ * overestimate of the true upper bound. The return value is guaranteed to be
+ * the same for all invocations with the same compressor and same 'in_nbytes'.
+ *
+ * As a special case, 'compressor' may be NULL. This causes the bound to be
+ * taken across *any* libdeflate_compressor that could ever be allocated with
+ * this build of the library, with any options.
+ *
+ * Note that this function is not necessary in many applications. With
+ * block-based compression, it is usually preferable to separately store the
+ * uncompressed size of each block and to store any blocks that did not compress
+ * to less than their original size uncompressed. In that scenario, there is no
+ * need to know the worst-case compressed size, since the maximum number of
+ * bytes of compressed data that may be used would always be one less than the
+ * input length. You can just pass a buffer of that size to
+ * libdeflate_deflate_compress() and store the data uncompressed if
+ * libdeflate_deflate_compress() returns 0, indicating that the compressed data
+ * did not fit into the provided output buffer.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * Like libdeflate_deflate_compress(), but uses the zlib wrapper format instead
+ * of raw DEFLATE.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * Like libdeflate_deflate_compress_bound(), but assumes the data will be
+ * compressed with libdeflate_zlib_compress() rather than with
+ * libdeflate_deflate_compress().
+ */
+LIBDEFLATEAPI size_t
+libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * Like libdeflate_deflate_compress(), but uses the gzip wrapper format instead
+ * of raw DEFLATE.
+ */
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail);
+
+/*
+ * Like libdeflate_deflate_compress_bound(), but assumes the data will be
+ * compressed with libdeflate_gzip_compress() rather than with
+ * libdeflate_deflate_compress().
+ */
+LIBDEFLATEAPI size_t
+libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
+ size_t in_nbytes);
+
+/*
+ * libdeflate_free_compressor() frees a compressor that was allocated with
+ * libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is
+ * taken.
+ */
+LIBDEFLATEAPI void
+libdeflate_free_compressor(struct libdeflate_compressor *compressor);
+
+/* ========================================================================== */
+/* Decompression */
+/* ========================================================================== */
+
+struct libdeflate_decompressor;
+struct libdeflate_options;
+
+/*
+ * libdeflate_alloc_decompressor() allocates a new decompressor that can be used
+ * for DEFLATE, zlib, and gzip decompression. The return value is a pointer to
+ * the new decompressor, or NULL if out of memory.
+ *
+ * This function takes no parameters, and the returned decompressor is valid for
+ * decompressing data that was compressed at any compression level and with any
+ * sliding window size.
+ *
+ * A single decompressor is not safe to use by multiple threads concurrently.
+ * However, different threads may use different decompressors concurrently.
+ */
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor(void);
+
+/*
+ * Like libdeflate_alloc_decompressor(), but adds the 'options' argument.
+ */
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
+
+/*
+ * Result of a call to libdeflate_deflate_decompress(),
+ * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
+ */
+enum libdeflate_result {
+ /* Decompression was successful. */
+ LIBDEFLATE_SUCCESS = 0,
+
+ /* Decompression failed because the compressed data was invalid,
+ * corrupt, or otherwise unsupported. */
+ LIBDEFLATE_BAD_DATA = 1,
+
+ /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have
+ * decompressed to fewer than 'out_nbytes_avail' bytes. */
+ LIBDEFLATE_SHORT_OUTPUT = 2,
+
+ /* The data would have decompressed to more than 'out_nbytes_avail'
+ * bytes. */
+ LIBDEFLATE_INSUFFICIENT_SPACE = 3,
+};
+
+/*
+ * libdeflate_deflate_decompress() decompresses a DEFLATE stream from the buffer
+ * 'in' with compressed size up to 'in_nbytes' bytes. The uncompressed data is
+ * written to 'out', a buffer with size 'out_nbytes_avail' bytes. If
+ * decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. Otherwise,
+ * a nonzero result code such as LIBDEFLATE_BAD_DATA is returned, and the
+ * contents of the output buffer are undefined.
+ *
+ * Decompression stops at the end of the DEFLATE stream (as indicated by the
+ * BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes.
+ *
+ * libdeflate_deflate_decompress() can be used in cases where the actual
+ * uncompressed size is known (recommended) or unknown (not recommended):
+ *
+ * - If the actual uncompressed size is known, then pass the actual
+ * uncompressed size as 'out_nbytes_avail' and pass NULL for
+ * 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail
+ * with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the
+ * specified number of bytes.
+ *
+ * - If the actual uncompressed size is unknown, then provide a non-NULL
+ * 'actual_out_nbytes_ret' and provide a buffer with some size
+ * 'out_nbytes_avail' that you think is large enough to hold all the
+ * uncompressed data. In this case, if the data decompresses to less than
+ * or equal to 'out_nbytes_avail' bytes, then
+ * libdeflate_deflate_decompress() will write the actual uncompressed size
+ * to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise,
+ * it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was
+ * not large enough but no other problems were encountered, or another
+ * nonzero result code if decompression failed for another reason.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL,
+ * then the actual compressed size of the DEFLATE stream (aligned to the next
+ * byte boundary) is written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format
+ * instead of raw DEFLATE.
+ *
+ * Decompression will stop at the end of the zlib stream, even if it is shorter
+ * than 'in_nbytes'. If you need to know exactly where the zlib stream ended,
+ * use libdeflate_zlib_decompress_ex().
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_zlib_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
+ * succeeds (indicating that the first zlib-compressed stream in the input
+ * buffer was decompressed), then the actual number of input bytes consumed is
+ * written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format
+ * instead of raw DEFLATE.
+ *
+ * If multiple gzip-compressed members are concatenated, then only the first
+ * will be decompressed. Use libdeflate_gzip_decompress_ex() if you need
+ * multi-member support.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret'
+ * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression
+ * succeeds (indicating that the first gzip-compressed member in the input
+ * buffer was decompressed), then the actual number of input bytes consumed is
+ * written to *actual_in_nbytes_ret.
+ */
+LIBDEFLATEAPI enum libdeflate_result
+libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
+ const void *in, size_t in_nbytes,
+ void *out, size_t out_nbytes_avail,
+ size_t *actual_in_nbytes_ret,
+ size_t *actual_out_nbytes_ret);
+
+/*
+ * libdeflate_free_decompressor() frees a decompressor that was allocated with
+ * libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action
+ * is taken.
+ */
+LIBDEFLATEAPI void
+libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
+
+/* ========================================================================== */
+/* Checksums */
+/* ========================================================================== */
+
+/*
+ * libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of
+ * data and returns the updated checksum. When starting a new checksum, the
+ * required initial value for 'adler' is 1. This value is also returned when
+ * 'buffer' is specified as NULL.
+ */
+LIBDEFLATEAPI uint32_t
+libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
+
+
+/*
+ * libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data
+ * and returns the updated checksum. When starting a new checksum, the required
+ * initial value for 'crc' is 0. This value is also returned when 'buffer' is
+ * specified as NULL.
+ */
+LIBDEFLATEAPI uint32_t
+libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
+
+/* ========================================================================== */
+/* Custom memory allocator */
+/* ========================================================================== */
+
+/*
+ * Install a custom memory allocator which libdeflate will use for all memory
+ * allocations by default. 'malloc_func' is a function that must behave like
+ * malloc(), and 'free_func' is a function that must behave like free().
+ *
+ * The per-(de)compressor custom memory allocator that can be specified in
+ * 'struct libdeflate_options' takes priority over this.
+ *
+ * This doesn't affect the free() function that will be used to free
+ * (de)compressors that were already in existence when this is called.
+ */
+LIBDEFLATEAPI void
+libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
+ void (*free_func)(void *));
+
+/*
+ * Advanced options. This is the options structure that
+ * libdeflate_alloc_compressor_ex() and libdeflate_alloc_decompressor_ex()
+ * require. Most users won't need this and should just use the non-"_ex"
+ * functions instead. If you do need this, it should be initialized like this:
+ *
+ * struct libdeflate_options options;
+ *
+ * memset(&options, 0, sizeof(options));
+ * options.sizeof_options = sizeof(options);
+ * // Then set the fields that you need to override the defaults for.
+ */
+struct libdeflate_options {
+
+ /*
+ * This field must be set to the struct size. This field exists for
+ * extensibility, so that fields can be appended to this struct in
+ * future versions of libdeflate while still supporting old binaries.
+ */
+ size_t sizeof_options;
+
+ /*
+ * An optional custom memory allocator to use for this (de)compressor.
+ * 'malloc_func' must be a function that behaves like malloc(), and
+ * 'free_func' must be a function that behaves like free().
+ *
+ * This is useful in cases where a process might have multiple users of
+ * libdeflate who want to use different memory allocators. For example,
+ * a library might want to use libdeflate with a custom memory allocator
+ * without interfering with user code that might use libdeflate too.
+ *
+ * This takes priority over the "global" memory allocator (which by
+ * default is malloc() and free(), but can be changed by
+ * libdeflate_set_memory_allocator()). Moreover, libdeflate will never
+ * call the "global" memory allocator if a per-(de)compressor custom
+ * allocator is always given.
+ */
+ void *(*malloc_func)(size_t);
+ void (*free_func)(void *);
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBDEFLATE_H */
diff --git a/Source/ThirdParty/OpenFBX/ofbx.cpp b/Source/ThirdParty/OpenFBX/ofbx.cpp
index e60211e3d..67f34bfeb 100644
--- a/Source/ThirdParty/OpenFBX/ofbx.cpp
+++ b/Source/ThirdParty/OpenFBX/ofbx.cpp
@@ -1,5 +1,5 @@
#include "ofbx.h"
-#include "miniz.h"
+#include "libdeflate.h"
#include
#include
#include
@@ -8,11 +8,39 @@
#include
#include
#include
+#include
+#include
+#include
+#if __cplusplus >= 202002L && defined(__cpp_lib_bit_cast)
+#include // for std::bit_cast (C++20 and later)
+#endif
+#include