From edf3be87892641eceaf83e779e659e6199c4bfb6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Love=20H=C3=B6rnquist=20=C3=85strand?= Date: Sun, 13 Apr 2008 15:27:08 +0200 Subject: [PATCH] First version, almost works --- LICENSE | 674 +++++++++++++++++++ README | 21 + doc/git-fast-import.webarchive | 1421 ++++++++++++++++++++++++++++++++++++++++ doc/svn-dump.txt | 255 +++++++ git2svn | 355 ++++++++++ run-tests.sh | 3 + 6 files changed, 2729 insertions(+) create mode 100644 LICENSE create mode 100644 README create mode 100644 doc/git-fast-import.webarchive create mode 100644 doc/svn-dump.txt create mode 100755 git2svn create mode 100644 run-tests.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20d40b6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/README b/README new file mode 100644 index 0000000..978849d --- /dev/null +++ b/README @@ -0,0 +1,21 @@ + +git2svn + +A tool to convert git branches to svn ditto. One usage to to make it +easier to people to migrate from svn and still use old svn tools (like +fisheye). + +Usage: + + git2svn path-to-dir-with-.git-file path-to-svnrepro + +Example: + + ./git2svn /Users/lha/src/heimdal/git-trunk repro + + +Missing: + + Command argument parser for branch and basedir specificition + (backend code is already there). + diff --git a/doc/git-fast-import.webarchive b/doc/git-fast-import.webarchive new file mode 100644 index 0000000..f46d748 --- /dev/null +++ b/doc/git-fast-import.webarchive @@ -0,0 +1,1421 @@ +bplist00Ñ_WebMainResourceÕ + ^WebResourceURL_WebResourceTextEncodingName_WebResourceMIMEType_WebResourceData_WebResourceFrameName_Dhttp://www.kernel.org/pub/software/scm/git/docs/git-fast-import.htmlUUTF-8Ytext/htmlOÛ0 + + + +git-fast-import(1) + + +

SYNOPSIS

+
+

frontend | git-fast-import [options]

+
+

DESCRIPTION

+
+

This program is usually not what the end user wants to run directly. +Most end users want to use one of the existing frontend programs, +which parses a specific type of foreign source and feeds the contents +stored there to git-fast-import.

+

fast-import reads a mixed command/data stream from standard input and +writes one or more packfiles directly into the current repository. +When EOF is received on standard input, fast import writes out +updated branch and tag refs, fully updating the current repository +with the newly imported data.

+

The fast-import backend itself can import into an empty repository (one that +has already been initialized by git-init(1)) or incrementally +update an existing populated repository. Whether or not incremental +imports are supported from a particular foreign source depends on +the frontend program in use.

+
+

OPTIONS

+
+
+
+--date-format=<fmt> +
+
+

+ Specify the type of dates the frontend will supply to + fast-import within author, committer and tagger commands. + See “Date Formats” below for details about which formats + are supported, and their syntax. +

+
+
+--force +
+
+

+ Force updating modified existing branches, even if doing + so would cause commits to be lost (as the new commit does + not contain the old commit). +

+
+
+--max-pack-size=<n> +
+
+

+ Maximum size of each output packfile, expressed in MiB. + The default is 4096 (4 GiB) as that is the maximum allowed + packfile size (due to file format limitations). Some + importers may wish to lower this, such as to ensure the + resulting packfiles fit on CDs. +

+
+
+--depth=<n> +
+
+

+ Maximum delta depth, for blob and tree deltification. + Default is 10. +

+
+
+--active-branches=<n> +
+
+

+ Maximum number of branches to maintain active at once. + See “Memory Utilization” below for details. Default is 5. +

+
+
+--export-marks=<file> +
+
+

+ Dumps the internal marks table to <file> when complete. + Marks are written one per line as :markid SHA-1. + Frontends can use this file to validate imports after they + have been completed, or to save the marks table across + incremental runs. As <file> is only opened and truncated + at checkpoint (or completion) the same path can also be + safely given to --import-marks. +

+
+
+--import-marks=<file> +
+
+

+ Before processing any input, load the marks specified in + <file>. The input file must exist, must be readable, and + must use the same format as produced by --export-marks. + Multiple options may be supplied to import more than one + set of marks. If a mark is defined to different values, + the last file wins. +

+
+
+--export-pack-edges=<file> +
+
+

+ After creating a packfile, print a line of data to + <file> listing the filename of the packfile and the last + commit on each branch that was written to that packfile. + This information may be useful after importing projects + whose total object set exceeds the 4 GiB packfile limit, + as these commits can be used as edge points during calls + to git-pack-objects(1). +

+
+
+--quiet +
+
+

+ Disable all non-fatal output, making fast-import silent when it + is successful. This option disables the output shown by + --stats. +

+
+
+--stats +
+
+

+ Display some basic statistics about the objects fast-import has + created, the packfiles they were stored into, and the + memory used by fast-import during this run. Showing this output + is currently the default, but can be disabled with --quiet. +

+
+
+
+

Performance

+
+

The design of fast-import allows it to import large projects in a minimum +amount of memory usage and processing time. Assuming the frontend +is able to keep up with fast-import and feed it a constant stream of data, +import times for projects holding 10+ years of history and containing +100,000+ individual commits are generally completed in just 1-2 +hours on quite modest (~$2,000 USD) hardware.

+

Most bottlenecks appear to be in foreign source data access (the +source just cannot extract revisions fast enough) or disk IO (fast-import +writes as fast as the disk will take the data). Imports will run +faster if the source data is stored on a different drive than the +destination Git repository (due to less IO contention).

+
+

Development Cost

+
+

A typical frontend for fast-import tends to weigh in at approximately 200 +lines of Perl/Python/Ruby code. Most developers have been able to +create working importers in just a couple of hours, even though it +is their first exposure to fast-import, and sometimes even to Git. This is +an ideal situation, given that most conversion tools are throw-away +(use once, and never look back).

+
+

Parallel Operation

+
+

Like git-push or git-fetch, imports handled by fast-import are safe to +run alongside parallel git repack -a -d or git gc invocations, +or any other Git operation (including git prune, as loose objects +are never used by fast-import).

+

fast-import does not lock the branch or tag refs it is actively importing. +After the import, during its ref update phase, fast-import tests each +existing branch ref to verify the update will be a fast-forward +update (the commit stored in the ref is contained in the new +history of the commit to be written). If the update is not a +fast-forward update, fast-import will skip updating that ref and instead +prints a warning message. fast-import will always attempt to update all +branch refs, and does not stop on the first failure.

+

Branch updates can be forced with --force, but its recommended that +this only be used on an otherwise quiet repository. Using --force +is not necessary for an initial import into an empty repository.

+
+

Technical Discussion

+
+

fast-import tracks a set of branches in memory. Any branch can be created +or modified at any point during the import process by sending a +commit command on the input stream. This design allows a frontend +program to process an unlimited number of branches simultaneously, +generating commits in the order they are available from the source +data. It also simplifies the frontend programs considerably.

+

fast-import does not use or alter the current working directory, or any +file within it. (It does however update the current Git repository, +as referenced by GIT_DIR.) Therefore an import frontend may use +the working directory for its own purposes, such as extracting file +revisions from the foreign source. This ignorance of the working +directory also allows fast-import to run very quickly, as it does not +need to perform any costly file update operations when switching +between branches.

+
+

Input Format

+
+

With the exception of raw file data (which Git does not interpret) +the fast-import input format is text (ASCII) based. This text based +format simplifies development and debugging of frontend programs, +especially when a higher level language such as Perl, Python or +Ruby is being used.

+

fast-import is very strict about its input. Where we say SP below we mean +exactly one space. Likewise LF means one (and only one) linefeed. +Supplying additional whitespace characters will cause unexpected +results, such as branch names or file names with leading or trailing +spaces in their name, or early termination of fast-import when it encounters +unexpected input.

+

Stream Comments

+

To aid in debugging frontends fast-import ignores any line that +begins with # (ASCII pound/hash) up to and including the line +ending LF. A comment line may contain any sequence of bytes +that does not contain an LF and therefore may be used to include +any detailed debugging information that might be specific to the +frontend and useful when inspecting a fast-import data stream.

+

Date Formats

+

The following date formats are supported. A frontend should select +the format it will use for this import by passing the format name +in the --date-format=<fmt> command line option.

+
+
+raw +
+
+

+ This is the Git native format and is <time> SP <offutc>. + It is also fast-import's default format, if --date-format was + not specified. +

+

The time of the event is specified by <time> as the number of +seconds since the UNIX epoch (midnight, Jan 1, 1970, UTC) and is +written as an ASCII decimal integer.

+

The local offset is specified by <offutc> as a positive or negative +offset from UTC. For example EST (which is 5 hours behind UTC) +would be expressed in <tz> by “-0500” while UTC is “+0000”. +The local offset does not affect <time>; it is used only as an +advisement to help formatting routines display the timestamp.

+

If the local offset is not available in the source material, use +“+0000”, or the most common local offset. For example many +organizations have a CVS repository which has only ever been accessed +by users who are located in the same location and timezone. In this +case a reasonable offset from UTC could be assumed.

+

Unlike the rfc2822 format, this format is very strict. Any +variation in formatting will cause fast-import to reject the value.

+
+
+rfc2822 +
+
+

+ This is the standard email format as described by RFC 2822. +

+

An example value is “Tue Feb 6 11:22:18 2007 -0500”. The Git +parser is accurate, but a little on the lenient side. It is the +same parser used by git-am(1) when applying patches +received from email.

+

Some malformed strings may be accepted as valid dates. In some of +these cases Git will still be able to obtain the correct date from +the malformed string. There are also some types of malformed +strings which Git will parse wrong, and yet consider valid. +Seriously malformed strings will be rejected.

+

Unlike the raw format above, the timezone/UTC offset information +contained in an RFC 2822 date string is used to adjust the date +value to UTC prior to storage. Therefore it is important that +this information be as accurate as possible.

+

If the source material uses RFC 2822 style dates, +the frontend should let fast-import handle the parsing and conversion +(rather than attempting to do it itself) as the Git parser has +been well tested in the wild.

+

Frontends should prefer the raw format if the source material +already uses UNIX-epoch format, can be coaxed to give dates in that +format, or its format is easily convertible to it, as there is no +ambiguity in parsing.

+
+
+now +
+
+

+ Always use the current time and timezone. The literal + now must always be supplied for <when>. +

+

This is a toy format. The current time and timezone of this system +is always copied into the identity string at the time it is being +created by fast-import. There is no way to specify a different time or +timezone.

+

This particular format is supplied as its short to implement and +may be useful to a process that wants to create a new commit +right now, without needing to use a working directory or +git-update-index(1).

+

If separate author and committer commands are used in a commit +the timestamps may not match, as the system clock will be polled +twice (once for each command). The only way to ensure that both +author and committer identity information has the same timestamp +is to omit author (thus copying from committer) or to use a +date format other than now.

+
+
+

Commands

+

fast-import accepts several commands to update the current repository +and control the current import process. More detailed discussion +(with examples) of each command follows later.

+
+
+commit +
+
+

+ Creates a new branch or updates an existing branch by + creating a new commit and updating the branch to point at + the newly created commit. +

+
+
+tag +
+
+

+ Creates an annotated tag object from an existing commit or + branch. Lightweight tags are not supported by this command, + as they are not recommended for recording meaningful points + in time. +

+
+
+reset +
+
+

+ Reset an existing branch (or a new branch) to a specific + revision. This command must be used to change a branch to + a specific revision without making a commit on it. +

+
+
+blob +
+
+

+ Convert raw file data into a blob, for future use in a + commit command. This command is optional and is not + needed to perform an import. +

+
+
+checkpoint +
+
+

+ Forces fast-import to close the current packfile, generate its + unique SHA-1 checksum and index, and start a new packfile. + This command is optional and is not needed to perform + an import. +

+
+
+progress +
+
+

+ Causes fast-import to echo the entire line to its own + standard output. This command is optional and is not needed + to perform an import. +

+
+
+

commit

+

Create or update a branch with a new commit, recording one logical +change to the project.

+
+
+
        'commit' SP <ref> LF
+        mark?
+        ('author' SP <name> SP LT <email> GT SP <when> LF)?
+        'committer' SP <name> SP LT <email> GT SP <when> LF
+        data
+        ('from' SP <committish> LF)?
+        ('merge' SP <committish> LF)?
+        (filemodify | filedelete | filecopy | filerename | filedeleteall)*
+        LF?
+
+

where <ref> is the name of the branch to make the commit on. +Typically branch names are prefixed with refs/heads/ in +Git, so importing the CVS branch symbol RELENG-1_0 would use +refs/heads/RELENG-1_0 for the value of <ref>. The value of +<ref> must be a valid refname in Git. As LF is not valid in +a Git refname, no quoting or escaping syntax is supported here.

+

A mark command may optionally appear, requesting fast-import to save a +reference to the newly created commit for future use by the frontend +(see below for format). It is very common for frontends to mark +every commit they create, thereby allowing future branch creation +from any imported commit.

+

The data command following committer must supply the commit +message (see below for data command syntax). To import an empty +commit message use a 0 length data. Commit messages are free-form +and are not interpreted by Git. Currently they must be encoded in +UTF-8, as fast-import does not permit other encodings to be specified.

+

Zero or more filemodify, filedelete, filecopy, filerename +and filedeleteall commands +may be included to update the contents of the branch prior to +creating the commit. These commands may be supplied in any order. +However it is recommended that a filedeleteall command precede +all filemodify, filecopy and filerename commands in the same +commit, as filedeleteall +wipes the branch clean (see below).

+

The LF after the command is optional (it used to be required).

+

author

+

An author command may optionally appear, if the author information +might differ from the committer information. If author is omitted +then fast-import will automatically use the committer's information for +the author portion of the commit. See below for a description of +the fields in author, as they are identical to committer.

+

committer

+

The committer command indicates who made this commit, and when +they made it.

+

Here <name> is the person's display name (for example +“Com M Itter”) and <email> is the person's email address +(“cm@example.com”). LT and GT are the literal less-than (\x3c) +and greater-than (\x3e) symbols. These are required to delimit +the email address from the other fields in the line. Note that +<name> is free-form and may contain any sequence of bytes, except +LT and LF. It is typically UTF-8 encoded.

+

The time of the change is specified by <when> using the date format +that was selected by the --date-format=<fmt> command line option. +See “Date Formats” above for the set of supported formats, and +their syntax.

+

from

+

The from command is used to specify the commit to initialize +this branch from. This revision will be the first ancestor of the +new commit.

+

Omitting the from command in the first commit of a new branch +will cause fast-import to create that commit with no ancestor. This +tends to be desired only for the initial commit of a project. +If the frontend creates all files from scratch when making a new +branch, a merge command may be used instead of from to start +the commit with an empty tree. +Omitting the from command on existing branches is usually desired, +as the current commit on that branch is automatically assumed to +be the first ancestor of the new commit.

+

As LF is not valid in a Git refname or SHA-1 expression, no +quoting or escaping syntax is supported within <committish>.

+

Here <committish> is any of the following:

+
    +
  • +

    +The name of an existing branch already in fast-import's internal branch + table. If fast-import doesn't know the name, its treated as a SHA-1 + expression. +

    +
  • +
  • +

    +A mark reference, :<idnum>, where <idnum> is the mark number. +

    +

    The reason fast-import uses : to denote a mark reference is this character +is not legal in a Git branch name. The leading : makes it easy +to distinguish between the mark 42 (:42) and the branch 42 (42 +or refs/heads/42), or an abbreviated SHA-1 which happened to +consist only of base-10 digits.

    +

    Marks must be declared (via mark) before they can be used.

    +
  • +
  • +

    +A complete 40 byte or abbreviated commit SHA-1 in hex. +

    +
  • +
  • +

    +Any valid Git SHA-1 expression that resolves to a commit. See + “SPECIFYING REVISIONS” in git-rev-parse(1) for details. +

    +
  • +
+

The special case of restarting an incremental import from the +current branch value should be written as:

+
+
+
        from refs/heads/branch^0
+
+

The ^0 suffix is necessary as fast-import does not permit a branch to +start from itself, and the branch is created in memory before the +from command is even read from the input. Adding ^0 will force +fast-import to resolve the commit through Git's revision parsing library, +rather than its internal branch table, thereby loading in the +existing value of the branch.

+

merge

+

Includes one additional ancestor commit. If the from command is +omitted when creating a new branch, the first merge commit will be +the first ancestor of the current commit, and the branch will start +out with no files. An unlimited number of merge commands per +commit are permitted by fast-import, thereby establishing an n-way merge. +However Git's other tools never create commits with more than 15 +additional ancestors (forming a 16-way merge). For this reason +it is suggested that frontends do not use more than 15 merge +commands per commit; 16, if starting a new, empty branch.

+

Here <committish> is any of the commit specification expressions +also accepted by from (see above).

+

filemodify

+

Included in a commit command to add a new file or change the +content of an existing file. This command has two different means +of specifying the content of the file.

+
+
+External data format +
+
+

+ The data content for the file was already supplied by a prior + blob command. The frontend just needs to connect it. +

+
+
+
        'M' SP <mode> SP <dataref> SP <path> LF
+
+

Here <dataref> can be either a mark reference (:<idnum>) +set by a prior blob command, or a full 40-byte SHA-1 of an +existing Git blob object.

+
+
+Inline data format +
+
+

+ The data content for the file has not been supplied yet. + The frontend wants to supply it as part of this modify + command. +

+
+
+
        'M' SP <mode> SP 'inline' SP <path> LF
+        data
+
+

See below for a detailed description of the data command.

+
+
+

In both formats <mode> is the type of file entry, specified +in octal. Git only supports the following modes:

+
    +
  • +

    +100644 or 644: A normal (not-executable) file. The majority + of files in most projects use this mode. If in doubt, this is + what you want. +

    +
  • +
  • +

    +100755 or 755: A normal, but executable, file. +

    +
  • +
  • +

    +120000: A symlink, the content of the file will be the link target. +

    +
  • +
+

In both formats <path> is the complete path of the file to be added +(if not already existing) or modified (if already existing).

+

A <path> string must use UNIX-style directory separators (forward +slash /), may contain any byte other than LF, and must not +start with double quote (").

+

If an LF or double quote must be encoded into <path> shell-style +quoting should be used, e.g. "path/with\n and \" in it".

+

The value of <path> must be in canonical form. That is it must not:

+
    +
  • +

    +contain an empty directory component (e.g. foo//bar is invalid), +

    +
  • +
  • +

    +end with a directory separator (e.g. foo/ is invalid), +

    +
  • +
  • +

    +start with a directory separator (e.g. /foo is invalid), +

    +
  • +
  • +

    +contain the special component . or .. (e.g. foo/./bar and + foo/../bar are invalid). +

    +
  • +
+

It is recommended that <path> always be encoded using UTF-8.

+

filedelete

+

Included in a commit command to remove a file or recursively +delete an entire directory from the branch. If the file or directory +removal makes its parent directory empty, the parent directory will +be automatically removed too. This cascades up the tree until the +first non-empty directory or the root is reached.

+
+
+
        'D' SP <path> LF
+
+

here <path> is the complete path of the file or subdirectory to +be removed from the branch. +See filemodify above for a detailed description of <path>.

+

filecopy

+

Recursively copies an existing file or subdirectory to a different +location within the branch. The existing file or directory must +exist. If the destination exists it will be completely replaced +by the content copied from the source.

+
+
+
        'C' SP <path> SP <path> LF
+
+

here the first <path> is the source location and the second +<path> is the destination. See filemodify above for a detailed +description of what <path> may look like. To use a source path +that contains SP the path must be quoted.

+

A filecopy command takes effect immediately. Once the source +location has been copied to the destination any future commands +applied to the source location will not impact the destination of +the copy.

+

filerename

+

Renames an existing file or subdirectory to a different location +within the branch. The existing file or directory must exist. If +the destination exists it will be replaced by the source directory.

+
+
+
        'R' SP <path> SP <path> LF
+
+

here the first <path> is the source location and the second +<path> is the destination. See filemodify above for a detailed +description of what <path> may look like. To use a source path +that contains SP the path must be quoted.

+

A filerename command takes effect immediately. Once the source +location has been renamed to the destination any future commands +applied to the source location will create new files there and not +impact the destination of the rename.

+

Note that a filerename is the same as a filecopy followed by a +filedelete of the source location. There is a slight performance +advantage to using filerename, but the advantage is so small +that it is never worth trying to convert a delete/add pair in +source material into a rename for fast-import. This filerename +command is provided just to simplify frontends that already have +rename information and don't want bother with decomposing it into a +filecopy followed by a filedelete.

+

filedeleteall

+

Included in a commit command to remove all files (and also all +directories) from the branch. This command resets the internal +branch structure to have no files in it, allowing the frontend +to subsequently add all interesting files from scratch.

+
+
+
        'deleteall' LF
+
+

This command is extremely useful if the frontend does not know +(or does not care to know) what files are currently on the branch, +and therefore cannot generate the proper filedelete commands to +update the content.

+

Issuing a filedeleteall followed by the needed filemodify +commands to set the correct content will produce the same results +as sending only the needed filemodify and filedelete commands. +The filedeleteall approach may however require fast-import to use slightly +more memory per active branch (less than 1 MiB for even most large +projects); so frontends that can easily obtain only the affected +paths for a commit are encouraged to do so.

+

mark

+

Arranges for fast-import to save a reference to the current object, allowing +the frontend to recall this object at a future point in time, without +knowing its SHA-1. Here the current object is the object creation +command the mark command appears within. This can be commit, +tag, and blob, but commit is the most common usage.

+
+
+
        'mark' SP ':' <idnum> LF
+
+

where <idnum> is the number assigned by the frontend to this mark. +The value of <idnum> is expressed as an ASCII decimal integer. +The value 0 is reserved and cannot be used as +a mark. Only values greater than or equal to 1 may be used as marks.

+

New marks are created automatically. Existing marks can be moved +to another object simply by reusing the same <idnum> in another +mark command.

+

tag

+

Creates an annotated tag referring to a specific commit. To create +lightweight (non-annotated) tags see the reset command below.

+
+
+
        'tag' SP <name> LF
+        'from' SP <committish> LF
+        'tagger' SP <name> SP LT <email> GT SP <when> LF
+        data
+
+

where <name> is the name of the tag to create.

+

Tag names are automatically prefixed with refs/tags/ when stored +in Git, so importing the CVS branch symbol RELENG-1_0-FINAL would +use just RELENG-1_0-FINAL for <name>, and fast-import will write the +corresponding ref as refs/tags/RELENG-1_0-FINAL.

+

The value of <name> must be a valid refname in Git and therefore +may contain forward slashes. As LF is not valid in a Git refname, +no quoting or escaping syntax is supported here.

+

The from command is the same as in the commit command; see +above for details.

+

The tagger command uses the same format as committer within +commit; again see above for details.

+

The data command following tagger must supply the annotated tag +message (see below for data command syntax). To import an empty +tag message use a 0 length data. Tag messages are free-form and are +not interpreted by Git. Currently they must be encoded in UTF-8, +as fast-import does not permit other encodings to be specified.

+

Signing annotated tags during import from within fast-import is not +supported. Trying to include your own PGP/GPG signature is not +recommended, as the frontend does not (easily) have access to the +complete set of bytes which normally goes into such a signature. +If signing is required, create lightweight tags from within fast-import with +reset, then create the annotated versions of those tags offline +with the standard git-tag(1) process.

+

reset

+

Creates (or recreates) the named branch, optionally starting from +a specific revision. The reset command allows a frontend to issue +a new from command for an existing branch, or to create a new +branch from an existing commit without creating a new commit.

+
+
+
        'reset' SP <ref> LF
+        ('from' SP <committish> LF)?
+        LF?
+
+

For a detailed description of <ref> and <committish> see above +under commit and from.

+

The LF after the command is optional (it used to be required).

+

The reset command can also be used to create lightweight +(non-annotated) tags. For example:

+
+
+
+
+
reset refs/tags/938
+from :938
+
+
+

would create the lightweight tag refs/tags/938 referring to +whatever commit mark :938 references.

+

blob

+

Requests writing one file revision to the packfile. The revision +is not connected to any commit; this connection must be formed in +a subsequent commit command by referencing the blob through an +assigned mark.

+
+
+
        'blob' LF
+        mark?
+        data
+
+

The mark command is optional here as some frontends have chosen +to generate the Git SHA-1 for the blob on their own, and feed that +directly to commit. This is typically more work than its worth +however, as marks are inexpensive to store and easy to use.

+

data

+

Supplies raw data (for use as blob/file content, commit messages, or +annotated tag messages) to fast-import. Data can be supplied using an exact +byte count or delimited with a terminating line. Real frontends +intended for production-quality conversions should always use the +exact byte count format, as it is more robust and performs better. +The delimited format is intended primarily for testing fast-import.

+

Comment lines appearing within the <raw> part of data commands +are always taken to be part of the body of the data and are therefore +never ignored by fast-import. This makes it safe to import any +file/message content whose lines might start with #.

+
+
+Exact byte count format +
+
+

+ The frontend must specify the number of bytes of data. +

+
+
+
        'data' SP <count> LF
+        <raw> LF?
+
+

where <count> is the exact number of bytes appearing within +<raw>. The value of <count> is expressed as an ASCII decimal +integer. The LF on either side of <raw> is not +included in <count> and will not be included in the imported data.

+

The LF after <raw> is optional (it used to be required) but +recommended. Always including it makes debugging a fast-import +stream easier as the next command always starts in column 0 +of the next line, even if <raw> did not end with an LF.

+
+
+Delimited format +
+
+

+ A delimiter string is used to mark the end of the data. + fast-import will compute the length by searching for the delimiter. + This format is primarily useful for testing and is not + recommended for real data. +

+
+
+
        'data' SP '<<' <delim> LF
+        <raw> LF
+        <delim> LF
+        LF?
+
+

where <delim> is the chosen delimiter string. The string <delim> +must not appear on a line by itself within <raw>, as otherwise +fast-import will think the data ends earlier than it really does. The LF +immediately trailing <raw> is part of <raw>. This is one of +the limitations of the delimited format, it is impossible to supply +a data chunk which does not have an LF as its last byte.

+

The LF after <delim> LF is optional (it used to be required).

+
+
+

checkpoint

+

Forces fast-import to close the current packfile, start a new one, and to +save out all current branch refs, tags and marks.

+
+
+
        'checkpoint' LF
+        LF?
+
+

Note that fast-import automatically switches packfiles when the current +packfile reaches --max-pack-size, or 4 GiB, whichever limit is +smaller. During an automatic packfile switch fast-import does not update +the branch refs, tags or marks.

+

As a checkpoint can require a significant amount of CPU time and +disk IO (to compute the overall pack SHA-1 checksum, generate the +corresponding index file, and update the refs) it can easily take +several minutes for a single checkpoint command to complete.

+

Frontends may choose to issue checkpoints during extremely large +and long running imports, or when they need to allow another Git +process access to a branch. However given that a 30 GiB Subversion +repository can be loaded into Git through fast-import in about 3 hours, +explicit checkpointing may not be necessary.

+

The LF after the command is optional (it used to be required).

+

progress

+

Causes fast-import to print the entire progress line unmodified to +its standard output channel (file descriptor 1) when the command is +processed from the input stream. The command otherwise has no impact +on the current import, or on any of fast-import's internal state.

+
+
+
        'progress' SP <any> LF
+        LF?
+
+

The <any> part of the command may contain any sequence of bytes +that does not contain LF. The LF after the command is optional. +Callers may wish to process the output through a tool such as sed to +remove the leading part of the line, for example:

+
+
+
+
+
frontend | git-fast-import | sed 's/^progress //'
+
+
+

Placing a progress command immediately after a checkpoint will +inform the reader when the checkpoint has been completed and it +can safely access the refs that fast-import updated.

+
+

Crash Reports

+
+

If fast-import is supplied invalid input it will terminate with a +non-zero exit status and create a crash report in the top level of +the Git repository it was importing into. Crash reports contain +a snapshot of the internal fast-import state as well as the most +recent commands that lead up to the crash.

+

All recent commands (including stream comments, file changes and +progress commands) are shown in the command history within the crash +report, but raw file data and commit messages are excluded from the +crash report. This exclusion saves space within the report file +and reduces the amount of buffering that fast-import must perform +during execution.

+

After writing a crash report fast-import will close the current +packfile and export the marks table. This allows the frontend +developer to inspect the repository state and resume the import from +the point where it crashed. The modified branches and tags are not +updated during a crash, as the import did not complete successfully. +Branch and tag information can be found in the crash report and +must be applied manually if the update is needed.

+

An example crash:

+
+
+
+
+
$ cat >in <<END_OF_INPUT
+# my very first test commit
+commit refs/heads/master
+committer Shawn O. Pearce <spearce> 19283 -0400
+# who is that guy anyway?
+data <<EOF
+this is my commit
+EOF
+M 644 inline .gitignore
+data <<EOF
+.gitignore
+EOF
+M 777 inline bob
+END_OF_INPUT
+
+
+
+
$ git-fast-import <in
+fatal: Corrupt mode: M 777 inline bob
+fast-import: dumping crash report to .git/fast_import_crash_8434
+
+
+
+
$ cat .git/fast_import_crash_8434
+fast-import crash report:
+    fast-import process: 8434
+    parent process     : 1391
+    at Sat Sep 1 00:58:12 2007
+
+
+
+
fatal: Corrupt mode: M 777 inline bob
+
+
+
+
Most Recent Commands Before Crash
+---------------------------------
+  # my very first test commit
+  commit refs/heads/master
+  committer Shawn O. Pearce <spearce> 19283 -0400
+  # who is that guy anyway?
+  data <<EOF
+  M 644 inline .gitignore
+  data <<EOF
+* M 777 inline bob
+
+
+
+
Active Branch LRU
+-----------------
+    active_branches = 1 cur, 5 max
+
+
+
+
pos  clock name
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 1)      0 refs/heads/master
+
+
+
+
Inactive Branches
+-----------------
+refs/heads/master:
+  status      : active loaded dirty
+  tip commit  : 0000000000000000000000000000000000000000
+  old tree    : 0000000000000000000000000000000000000000
+  cur tree    : 0000000000000000000000000000000000000000
+  commit clock: 0
+  last pack   :
+
+
+
+
-------------------
+END OF CRASH REPORT
+
+
+
+

Tips and Tricks

+
+

The following tips and tricks have been collected from various +users of fast-import, and are offered here as suggestions.

+

Use One Mark Per Commit

+

When doing a repository conversion, use a unique mark per commit +(mark :<n>) and supply the --export-marks option on the command +line. fast-import will dump a file which lists every mark and the Git +object SHA-1 that corresponds to it. If the frontend can tie +the marks back to the source repository, it is easy to verify the +accuracy and completeness of the import by comparing each Git +commit to the corresponding source revision.

+

Coming from a system such as Perforce or Subversion this should be +quite simple, as the fast-import mark can also be the Perforce changeset +number or the Subversion revision number.

+

Freely Skip Around Branches

+

Don't bother trying to optimize the frontend to stick to one branch +at a time during an import. Although doing so might be slightly +faster for fast-import, it tends to increase the complexity of the frontend +code considerably.

+

The branch LRU builtin to fast-import tends to behave very well, and the +cost of activating an inactive branch is so low that bouncing around +between branches has virtually no impact on import performance.

+

Handling Renames

+

When importing a renamed file or directory, simply delete the old +name(s) and modify the new name(s) during the corresponding commit. +Git performs rename detection after-the-fact, rather than explicitly +during a commit.

+

Use Tag Fixup Branches

+

Some other SCM systems let the user create a tag from multiple +files which are not from the same commit/changeset. Or to create +tags which are a subset of the files available in the repository.

+

Importing these tags as-is in Git is impossible without making at +least one commit which “fixes up” the files to match the content +of the tag. Use fast-import's reset command to reset a dummy branch +outside of your normal branch space to the base commit for the tag, +then commit one or more file fixup commits, and finally tag the +dummy branch.

+

For example since all normal branches are stored under refs/heads/ +name the tag fixup branch TAG_FIXUP. This way it is impossible for +the fixup branch used by the importer to have namespace conflicts +with real branches imported from the source (the name TAG_FIXUP +is not refs/heads/TAG_FIXUP).

+

When committing fixups, consider using merge to connect the +commit(s) which are supplying file revisions to the fixup branch. +Doing so will allow tools such as git-blame(1) to track +through the real commit history and properly annotate the source +files.

+

After fast-import terminates the frontend will need to do rm .git/TAG_FIXUP +to remove the dummy branch.

+

Import Now, Repack Later

+

As soon as fast-import completes the Git repository is completely valid +and ready for use. Typically this takes only a very short time, +even for considerably large projects (100,000+ commits).

+

However repacking the repository is necessary to improve data +locality and access performance. It can also take hours on extremely +large projects (especially if -f and a large --window parameter is +used). Since repacking is safe to run alongside readers and writers, +run the repack in the background and let it finish when it finishes. +There is no reason to wait to explore your new Git project!

+

If you choose to wait for the repack, don't try to run benchmarks +or performance tests until repacking is completed. fast-import outputs +suboptimal packfiles that are simply never seen in real use +situations.

+

Repacking Historical Data

+

If you are repacking very old imported data (e.g. older than the +last year), consider expending some extra CPU time and supplying +--window=50 (or higher) when you run git-repack(1). +This will take longer, but will also produce a smaller packfile. +You only need to expend the effort once, and everyone using your +project will benefit from the smaller repository.

+

Include Some Progress Messages

+

Every once in a while have your frontend emit a progress message +to fast-import. The contents of the messages are entirely free-form, +so one suggestion would be to output the current month and year +each time the current commit date moves into the next month. +Your users will feel better knowing how much of the data stream +has been processed.

+
+

Packfile Optimization

+
+

When packing a blob fast-import always attempts to deltify against the last +blob written. Unless specifically arranged for by the frontend, +this will probably not be a prior version of the same file, so the +generated delta will not be the smallest possible. The resulting +packfile will be compressed, but will not be optimal.

+

Frontends which have efficient access to all revisions of a +single file (for example reading an RCS/CVS ,v file) can choose +to supply all revisions of that file as a sequence of consecutive +blob commands. This allows fast-import to deltify the different file +revisions against each other, saving space in the final packfile. +Marks can be used to later identify individual file revisions during +a sequence of commit commands.

+

The packfile(s) created by fast-import do not encourage good disk access +patterns. This is caused by fast-import writing the data in the order +it is received on standard input, while Git typically organizes +data within packfiles to make the most recent (current tip) data +appear before historical data. Git also clusters commits together, +speeding up revision traversal through better cache locality.

+

For this reason it is strongly recommended that users repack the +repository with git repack -a -d after fast-import completes, allowing +Git to reorganize the packfiles for faster data access. If blob +deltas are suboptimal (see above) then also adding the -f option +to force recomputation of all deltas can significantly reduce the +final packfile size (30-50% smaller can be quite typical).

+
+

Memory Utilization

+
+

There are a number of factors which affect how much memory fast-import +requires to perform an import. Like critical sections of core +Git, fast-import uses its own memory allocators to amortize any overheads +associated with malloc. In practice fast-import tends to amortize any +malloc overheads to 0, due to its use of large block allocations.

+

per object

+

fast-import maintains an in-memory structure for every object written in +this execution. On a 32 bit system the structure is 32 bytes, +on a 64 bit system the structure is 40 bytes (due to the larger +pointer sizes). Objects in the table are not deallocated until +fast-import terminates. Importing 2 million objects on a 32 bit system +will require approximately 64 MiB of memory.

+

The object table is actually a hashtable keyed on the object name +(the unique SHA-1). This storage configuration allows fast-import to reuse +an existing or already written object and avoid writing duplicates +to the output packfile. Duplicate blobs are surprisingly common +in an import, typically due to branch merges in the source.

+

per mark

+

Marks are stored in a sparse array, using 1 pointer (4 bytes or 8 +bytes, depending on pointer size) per mark. Although the array +is sparse, frontends are still strongly encouraged to use marks +between 1 and n, where n is the total number of marks required for +this import.

+

per branch

+

Branches are classified as active and inactive. The memory usage +of the two classes is significantly different.

+

Inactive branches are stored in a structure which uses 96 or 120 +bytes (32 bit or 64 bit systems, respectively), plus the length of +the branch name (typically under 200 bytes), per branch. fast-import will +easily handle as many as 10,000 inactive branches in under 2 MiB +of memory.

+

Active branches have the same overhead as inactive branches, but +also contain copies of every tree that has been recently modified on +that branch. If subtree include has not been modified since the +branch became active, its contents will not be loaded into memory, +but if subtree src has been modified by a commit since the branch +became active, then its contents will be loaded in memory.

+

As active branches store metadata about the files contained on that +branch, their in-memory storage size can grow to a considerable size +(see below).

+

fast-import automatically moves active branches to inactive status based on +a simple least-recently-used algorithm. The LRU chain is updated on +each commit command. The maximum number of active branches can be +increased or decreased on the command line with --active-branches=.

+

per active tree

+

Trees (aka directories) use just 12 bytes of memory on top of the +memory required for their entries (see “per active file” below). +The cost of a tree is virtually 0, as its overhead amortizes out +over the individual file entries.

+

per active file entry

+

Files (and pointers to subtrees) within active trees require 52 or 64 +bytes (32/64 bit platforms) per entry. To conserve space, file and +tree names are pooled in a common string table, allowing the filename +“Makefile” to use just 16 bytes (after including the string header +overhead) no matter how many times it occurs within the project.

+

The active branch LRU, when coupled with the filename string pool +and lazy loading of subtrees, allows fast-import to efficiently import +projects with 2,000+ branches and 45,114+ files in a very limited +memory footprint (less than 2.7 MiB per active branch).

+
+

Author

+
+

Written by Shawn O. Pearce <spearce@spearce.org>.

+
+

Documentation

+
+

Documentation by Shawn O. Pearce <spearce@spearce.org>.

+
+

GIT

+
+

Part of the git(7) suite

+
+ + + +P (7Uk}”ÛáëÜ Ü \ No newline at end of file diff --git a/doc/svn-dump.txt b/doc/svn-dump.txt new file mode 100644 index 0000000..1c0226d --- /dev/null +++ b/doc/svn-dump.txt @@ -0,0 +1,255 @@ +This file describes the format produced by 'svnadmin dump' and +consumed by 'svnadmin load'. + +The format has undergone revisions over time. They are presented in +reverse chronological order here. You may wish to start with the +VERSION 1 description in order to get a baseline understanding first. + +===== SVN DUMPFILE VERSION 3 FORMAT ===== + +(generated by SVN versions 1.1.0-present, if requested by the user) + +This format is equivalent to the VERSION 2 format except for the +following: + +1.) The format starts with the new version number of the dump format + ("SVN-fs-dump-format-version: 3\n"). + +2.) There are three new optional headers for node changes: + +[Text-delta: true|false] +[Prop-delta: true|false] +[Text-delta-base-md5: blob] + + The default value for the boolean headers is "false". If the value is + set to "true", then the text and property contents will be treated + as deltas against the previous contents of the node (as determined + by copy history for adds with history, or by the value in the + previous revision for changes--just as with commits). + +Property deltas have the same format as regular property lists except +that (1) properties with the same value as in the previous contents of +the node are not printed, and (2) deleted properties will be written +out as + +D + + +just as a regular property is printed, but with the "K " changed to a +"D " and with no value part. + +Text deltas are written out as a series of svndiff0 windows. If +Text-delta-base-md5 is provided, it is the checksum of the base to +which the text delta is applied; note that older versions (pre-1.5) of +'svnadmin load' may ignore the checksum. + +===== SVN DUMPFILE VERSION 2 FORMAT ===== + +(generated by SVN versions 0.18.0-present, by default) + +This format is equivalent to the VERSION 1 format in every respect, +except for the following: + +1.) The format starts with the new version number of the dump format + ("SVN-fs-dump-format-version: 2\n"). + +2.) In addition to "Revision Records", another sort of record is supported: + the "UUID" record, which should be of the form: + +UUID: 7bf7a5ef-cabf-0310-b7d4-93df341afa7e + + This should be used to indicate the UUID of the originating repository. + +===== SVN DUMPFILE VERSION 1 FORMAT ===== + +(generated by SVN versions prior to 0.18.0) + +The binary format starts with the version number of the dump format +("SVN-fs-dump-format-version: 1\n"), followed by a series of revision +records. Each revision record starts with information about the +revision, followed by a variable number of node changes for that +revision. Fields in [braces] are optional, and unknown headers are +always ignored, for backwards compatibility. + +Revision-number: N +Prop-content-length: P +Content-length: L + + ...P bytes of property data. Properties are stored in the same + human-readable hashdump format used by working copy property files, + except that they end with "PROPS-END\n" for better readability. + +Node-path: /absolute/path/to/node/in/filesystem +Node-kind: file | dir (1) +Node-action: change | add | delete | replace +[Node-copyfrom-rev: X] +[Node-copyfrom-path: /path ] +[Text-copy-source-md5: blob] (2) +[Text-content-md5: blob] +[Text-content-length: T] +[Prop-content-length: P] +Content-length: Y (3) + + ... Y bytes of content data, divided into P bytes of "property" + data and T bytes of "text" data. The properties come first; their + total length (including formatting) is Prop-content-length, and is + included in Node-content-length. The "PROPS-END\n" line always + terminates the property section if there are props. The remainder + of the Y bytes (expected to be equivalent to Text-content-length) + represent the contents of the node. + + +Notes: + + (1) if the node represents a deletion, this field is optional. + + (2) this is a checksum of the source of the copy. a loader process + can use this checksum to determine that the copyfrom path/rev + already present in a filesystem is really the *correct* one to + use. + + (3) the Content-length header is technically unnecessary, since the + information it holds (and more) can be found in the + Prop-content-length and Text-content-length fields. Though + Subversion itself does not make use of the header when reading + a dumpfile, we include it for compatibility with generic RFC822 + parsers. + + (4) There are actually 2 types of version 1 dump streams. The + regular ones are generated since r2634 (svn 0.14.0). Older ones + also claim to be version 1, but miss the Props-content-length + and Text-content-length fields in the block header. In those + days there *always* was a properties block. + +EXAMPLE: + +Here's an example of revision 1422, whereby I added a new directory +"baz", added a new file "bop" inside it, and modified the file "foo.c": + +Revision-number: 1422 +Prop-content-length: 80 +Content-length: 80 + +K 6 +author +V 7 +sussman +K 3 +log +V 33 +Added two files, changed a third. +PROPS-END + +Node-path: bar/baz +Node-kind: dir +Node-action: add +Prop-content-length: 35 +Content-length: 35 + +K 10 +svn:ignore +V 4 +TAGS +PROPS-END + + +Node-path: bar/baz/bop +Node-kind: file +Node-action: add +Prop-content-length: 76 +Text-content-length: 54 +Content-length: 130 + +K 14 +svn:executable +V 2 +on +K 12 +svn:keywords +V 15 +LastChangedDate +PROPS-END +Here is the text of the newly added 'bop' file. +Whee. + +Node-path: bar/foo.c +Node-kind: file +Node-action: change +Text-content-length: 102 +Content-length: 102 + +Here is the fulltext of my change to an existing /bar/foo.c. +Notice that this file has no properties. + +-*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- + +Old discussion: + +(This file started as a proposal, preserved here for posterity.) + +A proposal for an svn filesystem dump/restore format. + +Two problems we want to solve +============================= + + 1. When we change our node-id schema, we need to migrate all of our + data (by dumping and restoring). + + 2. Serves as a backup format. Could be read by other software tools + someday. + + +Design Goals +============ + + A. Written as two new public functions in svn_fs.h. To be invoked + by new 'svnadmin' subcommands. + + B. Format uses only timeless fs concepts. + + The dump format needs to reference concepts that we *know* are + general enough to never change. These concepts must exist + independently of any internal node-id schema, or any DB storage + backend. In other words, we're talking about the basic ideas in + our original "design spec" from May 2000. + + +Format Semantics +================ + +Here are the timeless semantics of our fs design -- the things that +would be stored in our dump format. + + - A filesystem is an array of trees. + Each tree is called a "revision" and has unversioned properties attached. + + - A revision has a tree of "nodes" hanging off of it. + Actually, the nodes in the filesystem form a DAG. A revision + always points to an initial node that represents the 'root' of some tree. + + - The majority of a tree's nodes are hard-links (references) to + nodes that were created in earlier trees. + + - A node contains + + - versioned text + - versioned properties + - predecessor history: "which node am I a variant of?" + - copy history: "which node am I a copy of?" + + The history values can be non-existent (meaning the node is + completely new), or can have a value of {revision, path}. + + +------------------------------------------------------------------------ +Refinement of proposal #2: (after discussion with gstein) +========================= + +Each node starts with RFC822-style headers at the top. The final +header is a 'Content-length:', followed by the content, so record +boundaries can be inferred. + +The content section has two implicit parts: a property hash, and the +fulltext. The division between these two sections is implied by the +"PROPS-END\n" tag at the end of the prophash. In the case of a +directory node or a revision, only the prophash is present. diff --git a/git2svn b/git2svn new file mode 100755 index 0000000..aabb38f --- /dev/null +++ b/git2svn @@ -0,0 +1,355 @@ +#!/usr/bin/perl +# git2svn, converts a git branch to a svn ditto +# Copyright (C) 2008 Love Hörnquist Åstrand +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; +use POSIX qw(strftime); + +my $IN; +my $OUT; + +# svn +my $svntree = "repro"; +my $basedir = "trunk"; + +# git +my $branch = "master"; +my $gittree; +my $syncname; +my $masterrev; +my $fexport; + + +my $revision = 1; + +my %blob; +my %paths; + +sub read_data +{ + my ($IN, $next, $length, $data, $l) = (shift, shift); + unless($next =~ m/^data (\d+)/) { die "missing data: $next" ; } + $length = $1; + + $l = read(IN, $data, $length); + unless ($l == $length) { die "failed to read data $l != $length"; } + return $data; +} + +sub prop +{ + my ($key, $value) = (shift, shift); + "K " . length($key) . "\n$key\nV " . length($value) . "\n$value\n"; +} + +sub parse_svn_tree +{ + my $url = shift; + my ($SVN, $type, $name); + + open(SVN, "svn ls -R $url|") or die "failed to open svn ls -R $url"; + while () { + if (m@/(.*)/$@) { + $type = 1; + $name = "$basedir/$1"; + } else { + $type = 2; + $name = "$basedir/$_"; + } + $paths{$name} = $type; + } + close SVN; + + open(SVN, "svn info $url|") or die "failed to open svn info $url"; + while () { + if (/^Revision: (\d+)/) { + $revision = $1 + 1; + last; + } + } + close SVN; +} + +sub parse_git_tree +{ + $masterrev= `cat ${gittree}/.git/refs/heads/${branch}`; + chomp($masterrev); + + + my $syncpoint="${gittree}/.git/refs/tags/${syncname}"; + + if (-f ${syncpoint}) { + my $oldmasterrev=`cat ${syncpoint}`; + chomp($oldmasterrev); + + if (${oldmasterrev} eq ${masterrev}) { + print STDERR "nothing to sync\n"; + exit 0; + } + + die "no $svntree, but incremental ? ". + "(delete tag $syncname to restart)" unless ( -d $svntree); + + $fexport = "$oldmasterrev..$masterrev"; + } else { + $fexport="${masterrev}"; + + system("svnadmin create ./$svntree") unless (-d $svntree); + } +} + + +sub checkdirs +{ + my $path = shift; + my $base = ""; + + # pick first dir, create, take next dir, continue until we reached basename + while ($path =~ m@^([^/]+)/(.*)$@) { + my $first = $base . $1; + $path = $2; + $base = $first . "/"; + next if ($paths{$first}); + + $paths{$first} = 1; + + printf OUT "Node-path: $first\n"; + printf OUT "Node-kind: dir\n"; + printf OUT "Node-action: add\n"; + printf OUT "Prop-content-length: 0\n"; + printf OUT "Content-length: 0\n"; + printf OUT "\n"; + } +} + +sub next_line +{ + my $IN = shift; + my $next = ; + chomp $next; + return $next; +} + +$|= 1; + +# parse arguments here .... + +die "to few arguments" if ($#ARGV < 1); + +mkdir ".data" unless (-d ".data"); + +$syncname = "git2svn-syncpoint-${branch}"; + +my $gitdump = ".data/git.dump-${branch}"; +my $svndump = ".data/svn.dump-${branch}"; +my $log = "log-${branch}"; + + +$gittree = $ARGV[0]; +$svntree = $ARGV[1]; + +parse_git_tree($gittree); + +my $cwd = `pwd`; +chomp($cwd); +parse_svn_tree("file://" . $cwd ."/". $svntree); + +system(">$log"); + +print STDERR "git fast-export $branch ($fexport)\n"; + +system("(cd $gittree && git fast-export $fexport) > $gitdump 2>$log") == 0 or + die "git fast-export: $!"; + +open IN, "$gitdump" or + die "failed to open $gitdump"; + +open OUT, ">$svndump" or + die "failed to open $svndump"; + +print STDERR "creating svn dump from revision $revision...\n"; + +print OUT "SVN-fs-dump-format-version: 3\n"; + +my $next = next_line(); +COMMAND: while (!eof(IN)) { + my $mark = undef; + if ($next eq "") { + $next = next_line($IN); + next COMMAND; + } elsif ($next =~ /^commit (.*)/) { + + my %commit; + + $next = next_line($IN); + if ($next =~ m/mark (.*)/) { + $mark = $1; + $next = next_line($IN); + } + if ($next =~ m/author (.*)/) { + $commit{author} = $1; + $next = next_line($IN); + } + unless ($next =~ m/committer (.+) <([^>]+)> (\d+) \+(\d+)$/) { + die "missing comitter" + } + + $commit{CommitterName} = $1; + $commit{CommitterEmail} = $2; + $commit{CommitterWhen} = $3; + $commit{CommitterTZ} = $4; + + $next = next_line($IN); + my $log = read_data($IN, $next); + + $next = next_line($IN); + if ($next =~ m/from (.*)/) { + $next = next_line($IN); + } + if ($next =~ m/merge (.*)/) { + $next = next_line($IN); + } + + my $date = + strftime("%Y-%m-%dT%H:%M:%S.000000Z", + gmtime($commit{CommitterWhen})); + + my $author = "(no author)"; + if ($commit{CommitterEmail} =~ m/([^@]+)/) { + $author = $1; + } + $author = "git2svn-dump" if ($author eq "(no author)"); + + my $props = ""; + $props .= prop("svn:author", $author); + $props .= prop("svn:log", $log); + $props .= prop("svn:date", $date); + $props .= "PROPS-END"; + + # push out svn info + + printf OUT "Revision-number: $revision\n"; $revision++; + printf OUT "Prop-content-length: ". length($props) . "\n"; + printf OUT "Content-length: " . length($props) . "\n"; + printf OUT "\n"; + print OUT "$props\n"; + + while (1) { + if ($next =~ m/M (\d+) (\S+) (.*)$/) { + my ($mode, $dataref, $path) = (oct $1, $2, $3); + my $content; + if ($dataref eq "inline") { + $next = next_line($IN); + $content = read_data($IN, $next); + } else { + $content = $blob{$dataref}; + delete $blob{$dataref}; + } + + checkdirs("$basedir/$path"); + + my $action = "add"; + + if ($paths{$path}) { + die "file was a dir" if ($paths{$path} != 2); + $action = "change"; + } else { + $paths{$path} = 2; + } + + + my $type = $mode & 0777000; + my $kind = ""; + $kind = "file" if ($type == 0100000); + $kind = "symlink" if ($type == 0120000); + die "$type unknown" if ($kind eq ""); + + $props = ""; + $props .= prop("svn:executable", "on") if ($mode & 0111); + + my $plen = length($props); + my $clen = length($content); + + printf OUT "Node-path: $basedir/$path\n"; + printf OUT "Node-kind: $kind\n"; + printf OUT "Node-action: $action\n"; + printf OUT "Text-content-length: $clen\n"; + printf OUT "Content-length: " . ($clen + $plen) . "\n"; + printf OUT "Prop-content-length: $plen\n" if ($plen); + printf OUT "\n"; + + print OUT "$props\n" if ($plen); + + print OUT $content; + printf OUT "\n"; + } elsif ($next =~ m/D (.*)/) { + my $path = $1; + + die "deleting non existing object" unless ($paths{$path}); + + delete $paths{$path}; + + printf OUT "Node-path: $basedir/$path\n"; + printf OUT "Node-action: delete\n"; + printf OUT "\n"; + + } elsif ($next =~ m/^C (.*)/) { + die "file copy ?"; + } elsif ($next =~ m/^R (.*)/) { + die "file rename ?"; + } elsif ($next =~ m/^filedeleteall$/) { + die "file delete all ?"; + } else { + next COMMAND; + } + $next = next_line($IN); + } + + } elsif ($next =~ /^tag .*/) { + } elsif ($next =~ /^reset .*/) { + } elsif ($next =~ /^blob/) { + $next = next_line($IN); + if ($next =~ m/mark (.*)/) { + $mark = $1; + $next = next_line($IN); + } + my $data = read_data($IN, $next); + $blob{$mark} = $data if (defined $mark); + } elsif ($next =~ /^checkpoint .*/) { + } elsif ($next =~ /^progress (.*)/) { + print STDERR "progress: $1\n"; + } else { + die "unknown command $next"; + } + $next = next_line($IN); +} + +close IN; +close OUT; + +print STDERR "(re-)setting sync-tag to new master\n"; + +system("cd $gittree && ". + "git tag -m \"sync $(date)\" -a -f ${syncname} ${masterrev}"); + +print STDERR "loading dump into svn\n"; + +system("svnadmin load $svntree < $svndump >>$log 2>&1") == 0 or + die "svnadmin load"; + +system("rm -f $svndump $gitdump log"); + +exit 0; diff --git a/run-tests.sh b/run-tests.sh new file mode 100644 index 0000000..c080ec7 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +./git2svn /Users/lha/src/heimdal/git-trunk repro -- 2.11.4.GIT