remove outproc for hh
[hiphop-php.git] / hphp / hack / src / naming / names_rust / naming_sqlite.rs
blob8df067aae8ad035a9409d55ad8195fa1f8f13e1f
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 //
3 // This source code is licensed under the MIT license found in the
4 // LICENSE file in the "hack" directory of this source tree.
6 use std::path::Path;
8 use anyhow::Context;
9 use hh24_types::Checksum;
10 use hh24_types::DeclHash;
11 use hh24_types::ToplevelCanonSymbolHash;
12 use hh24_types::ToplevelSymbolHash;
13 use oxidized::file_info::NameType;
14 use relative_path::RelativePath;
15 use rusqlite::params;
16 use rusqlite::Connection;
17 use rusqlite::OptionalExtension;
19 pub struct Names {
20     conn: rusqlite::Connection,
23 impl Names {
24     pub fn from_file(path: impl AsRef<Path>) -> anyhow::Result<Self> {
25         let path = path.as_ref();
26         let mut conn = Connection::open(path)?;
27         Self::create_tables(&mut conn)?;
28         Self::create_indices(&mut conn)?;
29         Ok(Self { conn })
30     }
32     pub fn new_in_memory() -> anyhow::Result<Self> {
33         let mut conn = Connection::open_in_memory()?;
34         Self::create_tables(&mut conn)?;
35         Self::create_indices(&mut conn)?;
36         Ok(Self { conn })
37     }
39     pub fn from_connection(mut conn: Connection) -> anyhow::Result<Self> {
40         Self::create_tables(&mut conn)?;
41         Self::create_indices(&mut conn)?;
42         Ok(Self { conn })
43     }
45     pub fn backup(&self, path: &Path) -> anyhow::Result<()> {
46         self.conn.backup(rusqlite::DatabaseName::Main, path, None)?;
47         Ok(())
48     }
50     /// These pragmas make things faster at the expense of write safety...
51     /// * journal_mode=OFF -- no rollback possible: the ROLLBACK TRANSACTION command won't work
52     /// * synchronous=OFF -- sqlite will return immediately after handing off writes to the OS, so data will be lost upon power-loss
53     /// * temp_store=MEMORY -- temporary tables and indices kept in memory
54     pub fn pragma_fast_but_not_durable(&self) -> anyhow::Result<()> {
55         self.conn.execute_batch(
56             "PRAGMA journal_mode = OFF;
57             PRAGMA synchronous = OFF;
58             PRAGMA temp_store = MEMORY;",
59         )?;
60         Ok(())
61     }
63     /// This does a sql "BEGIN EXCLUSIVE TRANSACTION".
64     /// (an 'exclusive' transaction is one that acquires a write lock immediately rather than lazily).
65     /// Then, once you call transaction.end() or drop it, "END TRANSACTION".
66     /// The main reason to use transactions is for speed.
67     /// (Note: if you opened naming-sqlite with "from_file_non_durable" then it
68     /// doesn't support ROLLBACK TRANSACTION).
69     pub fn transaction(&self) -> anyhow::Result<Transaction<'_>> {
70         Transaction::new(&self.conn)
71     }
73     fn create_tables(conn: &mut Connection) -> anyhow::Result<()> {
74         conn.execute(
75             "
76             CREATE TABLE IF NOT EXISTS NAMING_SYMBOLS (
77                 HASH INTEGER PRIMARY KEY NOT NULL,
78                 CANON_HASH INTEGER NOT NULL,
79                 DECL_HASH INTEGER NOT NULL,
80                 FLAGS INTEGER NOT NULL,
81                 FILE_INFO_ID INTEGER NOT NULL
82             );",
83             params![],
84         )?;
86         conn.execute(
87             "
88             CREATE TABLE IF NOT EXISTS NAMING_SYMBOLS_OVERFLOW (
89                 HASH INTEGER KEY NOT NULL,
90                 CANON_HASH INTEGER NOT NULL,
91                 DECL_HASH INTEGER NOT NULL,
92                 FLAGS INTEGER NOT NULL,
93                 FILE_INFO_ID INTEGER NOT NULL
94             );",
95             params![],
96         )?;
98         conn.execute(
99             "
100             CREATE TABLE IF NOT EXISTS NAMING_FILE_INFO (
101                 FILE_INFO_ID INTEGER PRIMARY KEY AUTOINCREMENT,
102                 PATH_PREFIX_TYPE INTEGER NOT NULL,
103                 PATH_SUFFIX TEXT NOT NULL,
104                 TYPE_CHECKER_MODE INTEGER,
105                 DECL_HASH INTEGER,
106                 CLASSES TEXT,
107                 CONSTS TEXT,
108                 FUNS TEXT,
109                 TYPEDEFS TEXT,
110                 MODULES TEXT
111             );",
112             params![],
113         )?;
115         conn.execute(
116             "
117             CREATE TABLE IF NOT EXISTS CHECKSUM (
118                 ID INTEGER PRIMARY KEY,
119                 CHECKSUM_VALUE INTEGER NOT NULL
120             );
121             ",
122             params![],
123         )?;
125         conn.execute(
126             "INSERT OR IGNORE INTO CHECKSUM (ID, CHECKSUM_VALUE) VALUES (0, 0);",
127             params![],
128         )?;
130         // This table contains a single dummy value and is here only to satisfy
131         // hh_server and hh_single_type_check.
132         conn.execute(
133             "
134             CREATE TABLE IF NOT EXISTS NAMING_LOCAL_CHANGES(
135                 ID INTEGER PRIMARY KEY,
136                 LOCAL_CHANGES BLOB NOT NULL,
137                 BASE_CONTENT_VERSION TEXT
138             );",
139             params![],
140         )?;
142         // The blob here is Relative_path.Map.empty as an OCaml-marshaled blob.
143         // The base_content_version (computed from the unix timestamp and a
144         // random ID) needs only be unique.
145         conn.execute(
146             "
147             INSERT OR IGNORE INTO NAMING_LOCAL_CHANGES
148             VALUES(0,X'8495a6be0000000100000000000000000000000040',?);",
149             params![format!(
150                 "{}-{}",
151                 std::time::SystemTime::now()
152                     .duration_since(std::time::SystemTime::UNIX_EPOCH)
153                     .expect("SystemTime::now() before UNIX_EPOCH")
154                     .as_secs(),
155                 {
156                     use rand::distributions::DistString;
157                     rand::distributions::Alphanumeric.sample_string(&mut rand::thread_rng(), 10)
158                 },
159             )],
160         )?;
162         Ok(())
163     }
165     pub fn create_indices(conn: &mut Connection) -> anyhow::Result<()> {
166         conn.execute(
167             "CREATE UNIQUE INDEX IF NOT EXISTS FILE_INFO_PATH_IDX
168              ON NAMING_FILE_INFO (PATH_SUFFIX, PATH_PREFIX_TYPE);",
169             params![],
170         )?;
172         conn.execute(
173             "CREATE INDEX IF NOT EXISTS TYPES_CANON
174              ON NAMING_SYMBOLS (CANON_HASH);",
175             params![],
176         )?;
178         Ok(())
179     }
181     pub fn get_checksum(&self) -> anyhow::Result<Checksum> {
182         Ok(self
183             .conn
184             .prepare_cached("SELECT CHECKSUM_VALUE FROM CHECKSUM")?
185             .query_row(params![], |row| row.get(0))?)
186     }
188     pub fn set_checksum(&self, checksum: Checksum) -> anyhow::Result<()> {
189         self.conn
190             .prepare_cached("REPLACE INTO CHECKSUM (ID, CHECKSUM_VALUE) VALUES (0, ?);")?
191             .execute(params![checksum])?;
192         Ok(())
193     }
195     // private helper for `save_file_summary`/`build`
196     fn insert_file_summary(
197         &self,
198         path: &RelativePath,
199         summary: &crate::FileSummary,
200         save_result: &mut crate::SaveResult,
201     ) -> anyhow::Result<()> {
202         let file_info_id = self.insert_file_info_and_get_file_id(path, summary)?;
203         save_result.files_added += 1;
204         self.insert_symbols(path, file_info_id, summary.funs(), save_result)?;
205         self.insert_symbols(path, file_info_id, summary.consts(), save_result)?;
206         self.insert_symbols(path, file_info_id, summary.classes(), save_result)?;
207         self.insert_symbols(path, file_info_id, summary.typedefs(), save_result)?;
208         self.insert_symbols(path, file_info_id, summary.modules(), save_result)?;
209         Ok(())
210     }
212     // private helper for insert_file_summary
213     fn insert_symbols<'a>(
214         &self,
215         path: &RelativePath,
216         file_id: crate::FileInfoId,
217         items: impl Iterator<Item = &'a crate::DeclSummary>,
218         save_result: &mut crate::SaveResult,
219     ) -> anyhow::Result<()> {
220         for item in items {
221             self.try_insert_symbol(path, file_id, item.clone(), save_result)
222                 .with_context(|| {
223                     format!(
224                         "Failed to insert {:?} {} (defined in {path})",
225                         item.name_type, item.symbol
226                     )
227                 })?
228         }
229         Ok(())
230     }
232     // private helper for insert_symbols
233     fn try_insert_symbol(
234         &self,
235         path: &RelativePath,
236         file_info_id: crate::FileInfoId,
237         item: crate::DeclSummary,
238         save_result: &mut crate::SaveResult,
239     ) -> anyhow::Result<()> {
240         let mut insert_statement = self.conn.prepare_cached(
241             "INSERT INTO NAMING_SYMBOLS (HASH, CANON_HASH, DECL_HASH, FLAGS, FILE_INFO_ID)
242             VALUES (?, ?, ?, ?, ?);",
243         )?;
244         let mut insert_overflow_statement = self.conn.prepare_cached(
245             "INSERT INTO NAMING_SYMBOLS_OVERFLOW (HASH, CANON_HASH, DECL_HASH, FLAGS, FILE_INFO_ID)
246             VALUES (?, ?, ?, ?, ?);",
247         )?;
248         let mut delete_statement = self.conn.prepare_cached(
249             "DELETE FROM NAMING_SYMBOLS
250             WHERE HASH = ? AND FILE_INFO_ID = ?",
251         )?;
252         let symbol_hash = ToplevelSymbolHash::new(item.name_type, &item.symbol);
253         let canon_hash = ToplevelCanonSymbolHash::new(item.name_type, item.symbol.clone());
254         let decl_hash = item.hash;
255         let kind = item.name_type;
257         if let Some(old) = self.get_row(symbol_hash)? {
258             assert_eq!(symbol_hash, old.hash);
259             assert_eq!(canon_hash, old.canon_hash);
260             // check if new entry appears first alphabetically
261             if path < &old.path {
262                 // delete old row from naming_symbols table
263                 delete_statement.execute(params![symbol_hash, old.file_info_id])?;
265                 // insert old row into overflow table
266                 insert_overflow_statement.execute(params![
267                     symbol_hash,
268                     canon_hash,
269                     old.decl_hash,
270                     old.kind,
271                     old.file_info_id
272                 ])?;
274                 // insert new row into naming_symbols table
275                 insert_statement.execute(params![
276                     symbol_hash,
277                     canon_hash,
278                     decl_hash,
279                     kind,
280                     file_info_id
281                 ])?;
283                 save_result
284                     .checksum
285                     .addremove(symbol_hash, old.decl_hash, &old.path); // remove old
286                 save_result.checksum.addremove(symbol_hash, decl_hash, path); // add new
287                 save_result.add_collision(kind, item.symbol, &old.path, path);
288             } else {
289                 // insert new row into overflow table
290                 insert_overflow_statement.execute(params![
291                     symbol_hash,
292                     canon_hash,
293                     decl_hash,
294                     kind,
295                     file_info_id
296                 ])?;
297                 save_result.add_collision(kind, item.symbol, &old.path, path);
298             }
299         } else {
300             // No collision. Insert as you normally would
301             insert_statement.execute(params![
302                 symbol_hash,
303                 canon_hash,
304                 decl_hash,
305                 kind,
306                 file_info_id
307             ])?;
308             save_result.checksum.addremove(symbol_hash, decl_hash, path);
309             save_result.symbols_added += 1;
310         }
311         Ok(())
312     }
314     /// Gets all overflow rows in the reverse naming table for a given symbol hash,
315     /// and joins with the forward naming table to resolve filenames.
316     pub fn get_overflow_rows_unordered(
317         &self,
318         symbol_hash: ToplevelSymbolHash,
319     ) -> anyhow::Result<Vec<crate::SymbolRow>> {
320         let select_statement = "
321         SELECT
322             NAMING_SYMBOLS_OVERFLOW.HASH,
323             NAMING_SYMBOLS_OVERFLOW.CANON_HASH,
324             NAMING_SYMBOLS_OVERFLOW.DECL_HASH,
325             NAMING_SYMBOLS_OVERFLOW.FLAGS,
326             NAMING_SYMBOLS_OVERFLOW.FILE_INFO_ID,
327             NAMING_FILE_INFO.PATH_PREFIX_TYPE,
328             NAMING_FILE_INFO.PATH_SUFFIX
329         FROM
330             NAMING_SYMBOLS_OVERFLOW
331         LEFT JOIN
332             NAMING_FILE_INFO
333         ON
334             NAMING_SYMBOLS_OVERFLOW.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
335         WHERE
336             NAMING_SYMBOLS_OVERFLOW.HASH = ?
337         ";
339         let mut select_statement = self.conn.prepare_cached(select_statement)?;
340         let mut rows = select_statement.query(params![symbol_hash])?;
341         let mut result = vec![];
342         while let Some(row) = rows.next()? {
343             let prefix: crate::datatypes::SqlitePrefix = row.get(5)?;
344             let suffix: crate::datatypes::SqlitePathBuf = row.get(6)?;
345             let path = RelativePath::make(prefix.value, suffix.value);
346             result.push(crate::SymbolRow {
347                 hash: row.get(0)?,
348                 canon_hash: row.get(1)?,
349                 decl_hash: row.get(2)?,
350                 kind: row.get(3)?,
351                 file_info_id: row.get(4)?,
352                 path,
353             });
354         }
355         Ok(result)
356     }
358     /// Gets the winning entry for a symbol from the reverse naming table,
359     /// and joins with forward-naming-table to get filename.
360     pub fn get_row(
361         &self,
362         symbol_hash: ToplevelSymbolHash,
363     ) -> anyhow::Result<Option<crate::SymbolRow>> {
364         let select_statement = "
365         SELECT
366             NAMING_SYMBOLS.HASH,
367             NAMING_SYMBOLS.CANON_HASH,
368             NAMING_SYMBOLS.DECL_HASH,
369             NAMING_SYMBOLS.FLAGS,
370             NAMING_SYMBOLS.FILE_INFO_ID,
371             NAMING_FILE_INFO.PATH_PREFIX_TYPE,
372             NAMING_FILE_INFO.PATH_SUFFIX
373         FROM
374             NAMING_SYMBOLS
375         LEFT JOIN
376             NAMING_FILE_INFO
377         ON
378             NAMING_SYMBOLS.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
379         WHERE
380             NAMING_SYMBOLS.HASH = ?
381         ";
383         let mut select_statement = self.conn.prepare_cached(select_statement)?;
384         let result = select_statement
385             .query_row(params![symbol_hash], |row| {
386                 let prefix: crate::datatypes::SqlitePrefix = row.get(5)?;
387                 let suffix: crate::datatypes::SqlitePathBuf = row.get(6)?;
388                 let path = RelativePath::make(prefix.value, suffix.value);
389                 Ok(crate::SymbolRow {
390                     hash: row.get(0)?,
391                     canon_hash: row.get(1)?,
392                     decl_hash: row.get(2)?,
393                     kind: row.get(3)?,
394                     file_info_id: row.get(4)?,
395                     path,
396                 })
397             })
398             .optional();
400         Ok(result?)
401     }
403     /// This looks up the reverse naming table by hash, to fetch the decl-hash
404     pub fn get_decl_hash(
405         &self,
406         symbol_hash: ToplevelSymbolHash,
407     ) -> anyhow::Result<Option<DeclHash>> {
408         let result = self
409             .conn
410             .prepare_cached("SELECT DECL_HASH FROM NAMING_SYMBOLS WHERE HASH = ?")?
411             .query_row(params![symbol_hash], |row| row.get(0))
412             .optional();
413         Ok(result?)
414     }
416     /// Looks up reverse-naming-table winner by symbol hash.
417     /// Similar to get_path_by_symbol_hash, but includes the name kind.
418     pub fn get_filename(
419         &self,
420         symbol_hash: ToplevelSymbolHash,
421     ) -> anyhow::Result<Option<(RelativePath, NameType)>> {
422         let select_statement = "
423         SELECT
424             NAMING_FILE_INFO.PATH_PREFIX_TYPE,
425             NAMING_FILE_INFO.PATH_SUFFIX,
426             NAMING_SYMBOLS.FLAGS
427         FROM
428             NAMING_SYMBOLS
429         LEFT JOIN
430             NAMING_FILE_INFO
431         ON
432             NAMING_SYMBOLS.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
433         WHERE
434             NAMING_SYMBOLS.HASH = ?
435         LIMIT 1
436         ";
438         let mut select_statement = self.conn.prepare_cached(select_statement)?;
439         let result = select_statement
440             .query_row(params![symbol_hash], |row| {
441                 let prefix: crate::datatypes::SqlitePrefix = row.get(0)?;
442                 let suffix: crate::datatypes::SqlitePathBuf = row.get(1)?;
443                 let kind: NameType = row.get(2)?;
444                 Ok((RelativePath::make(prefix.value, suffix.value), kind))
445             })
446             .optional();
448         Ok(result?)
449     }
451     /// Looks up reverse-naming-table winner by symbol hash.
452     /// Similar to get_filename, but discards the name kind
453     pub fn get_path_by_symbol_hash(
454         &self,
455         symbol_hash: ToplevelSymbolHash,
456     ) -> anyhow::Result<Option<RelativePath>> {
457         match self.get_filename(symbol_hash)? {
458             Some((path, _kind)) => Ok(Some(path)),
459             None => Ok(None),
460         }
461     }
463     /// Looks up reverse-naming-table winner by case-insensitive symbol hash.
464     pub fn get_path_case_insensitive(
465         &self,
466         symbol_hash: ToplevelCanonSymbolHash,
467     ) -> anyhow::Result<Option<RelativePath>> {
468         let select_statement = "
469         SELECT
470             NAMING_FILE_INFO.PATH_PREFIX_TYPE,
471             NAMING_FILE_INFO.PATH_SUFFIX
472         FROM
473             NAMING_SYMBOLS
474         LEFT JOIN
475             NAMING_FILE_INFO
476         ON
477             NAMING_SYMBOLS.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
478         WHERE
479         NAMING_SYMBOLS.CANON_HASH = ?
480         ";
482         let mut select_statement = self.conn.prepare_cached(select_statement)?;
483         let result = select_statement
484             .query_row(params![symbol_hash], |row| {
485                 let prefix: crate::datatypes::SqlitePrefix = row.get(0)?;
486                 let suffix: crate::datatypes::SqlitePathBuf = row.get(1)?;
487                 Ok(RelativePath::make(prefix.value, suffix.value))
488             })
489             .optional();
491         Ok(result?)
492     }
494     /// This function shouldn't really exist.
495     /// It searches the reverse-naming-table by case-insensitive hash.
496     /// Then looks up the forward-naming-table entry for that winner.
497     /// Then it iterates the string type names stored in that forward-naming-table entry,
498     /// comparing them one by one until it finds one whose case-insensitive hash
499     /// matches what was asked for.
500     pub fn get_type_name_case_insensitive(
501         &self,
502         symbol_hash: ToplevelCanonSymbolHash,
503     ) -> anyhow::Result<Option<String>> {
504         let select_statement = "
505         SELECT
506             NAMING_FILE_INFO.CLASSES,
507             NAMING_FILE_INFO.TYPEDEFS
508         FROM
509             NAMING_SYMBOLS
510         LEFT JOIN
511             NAMING_FILE_INFO
512         ON
513             NAMING_SYMBOLS.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
514         WHERE
515             NAMING_SYMBOLS.CANON_HASH = ?
516         LIMIT 1
517         ";
519         let mut select_statement = self.conn.prepare_cached(select_statement)?;
520         let names_opt = select_statement
521             .query_row(params![symbol_hash], |row| {
522                 let classes: Option<String> = row.get(0)?;
523                 let typedefs: Option<String> = row.get(1)?;
524                 Ok((classes, typedefs))
525             })
526             .optional()?;
528         if let Some((classes, typedefs)) = names_opt {
529             for class in classes.as_deref().unwrap_or_default().split_terminator('|') {
530                 if symbol_hash == ToplevelCanonSymbolHash::from_type(class.to_owned()) {
531                     return Ok(Some(class.to_owned()));
532                 }
533             }
534             for typedef in typedefs
535                 .as_deref()
536                 .unwrap_or_default()
537                 .split_terminator('|')
538             {
539                 if symbol_hash == ToplevelCanonSymbolHash::from_type(typedef.to_owned()) {
540                     return Ok(Some(typedef.to_owned()));
541                 }
542             }
543         }
544         Ok(None)
545     }
547     /// This function shouldn't really exist.
548     /// It searches the reverse-naming-table by case-insensitive hash.
549     /// Then looks up the forward-naming-table entry for that winner.
550     /// Then it iterates the string fun names stored in that forward-naming-table entry,
551     /// comparing them one by one until it finds one whose case-insensitive hash
552     /// matches what was asked for.
553     pub fn get_fun_name_case_insensitive(
554         &self,
555         symbol_hash: ToplevelCanonSymbolHash,
556     ) -> anyhow::Result<Option<String>> {
557         let select_statement = "
558         SELECT
559             NAMING_FILE_INFO.FUNS
560         FROM
561             NAMING_SYMBOLS
562         LEFT JOIN
563             NAMING_FILE_INFO
564         ON
565             NAMING_SYMBOLS.FILE_INFO_ID = NAMING_FILE_INFO.FILE_INFO_ID
566         WHERE
567             NAMING_SYMBOLS.CANON_HASH = ?
568         LIMIT 1
569         ";
571         let mut select_statement = self.conn.prepare_cached(select_statement)?;
572         let names_opt = select_statement
573             .query_row(params![symbol_hash], |row| {
574                 let funs: Option<String> = row.get(0)?;
575                 Ok(funs)
576             })
577             .optional()?;
579         if let Some(funs) = names_opt {
580             for fun in funs.as_deref().unwrap_or_default().split_terminator('|') {
581                 if symbol_hash == ToplevelCanonSymbolHash::from_fun(fun.to_owned()) {
582                     return Ok(Some(fun.to_owned()));
583                 }
584             }
585         }
586         Ok(None)
587     }
589     /// This function will return an empty list if the path doesn't exist in the forward naming table
590     pub fn get_symbol_hashes_for_winners_and_losers(
591         &self,
592         path: &RelativePath,
593     ) -> anyhow::Result<Vec<(ToplevelSymbolHash, crate::FileInfoId)>> {
594         // The NAMING_FILE_INFO table stores e.g. "Classname1|Classname2|Classname3". This
595         // helper turns it into a vec of (ToplevelSymbolHash,file_info_id) pairs
596         fn split(
597             s: Option<String>,
598             f: impl Fn(&str) -> ToplevelSymbolHash,
599             file_info_id: crate::FileInfoId,
600         ) -> Vec<(ToplevelSymbolHash, crate::FileInfoId)> {
601             match s {
602                 None => vec![],
603                 Some(s) => s
604                     .split_terminator('|')
605                     .map(|name| (f(name), file_info_id))
606                     .collect(),
607             }
608             // s.split_terminator yields an empty list for an empty string;
609             // s.split would have yielded a singleton list, which we don't want.
610         }
612         let mut results = vec![];
613         self.conn
614             .prepare_cached(
615                 "SELECT CLASSES, CONSTS, FUNS, TYPEDEFS, MODULES, FILE_INFO_ID FROM NAMING_FILE_INFO
616                 WHERE PATH_PREFIX_TYPE = ?
617                 AND PATH_SUFFIX = ?",
618             )?
619             .query_row(params![path.prefix() as u8, path.path_str()], |row| {
620                 let file_info_id: crate::FileInfoId = row.get(5)?;
621                 results.extend(split(row.get(0)?, ToplevelSymbolHash::from_type, file_info_id));
622                 results.extend(split(row.get(1)?, ToplevelSymbolHash::from_const, file_info_id));
623                 results.extend(split(row.get(2)?, ToplevelSymbolHash::from_fun, file_info_id));
624                 results.extend(split(row.get(3)?, ToplevelSymbolHash::from_type, file_info_id));
625                 results.extend(split(row.get(4)?, ToplevelSymbolHash::from_module, file_info_id));
626                 Ok(())
627             })
628             .optional()?;
629         Ok(results)
630     }
632     /// This inserts an item into the forward naming table.
633     fn insert_file_info_and_get_file_id(
634         &self,
635         path_rel: &RelativePath,
636         file_summary: &crate::FileSummary,
637     ) -> anyhow::Result<crate::FileInfoId> {
638         let prefix_type = path_rel.prefix() as u8; // TODO(ljw): shouldn't this use prefix_to_i64?
639         let suffix = path_rel.path().to_str().unwrap();
640         let type_checker_mode = crate::datatypes::convert::mode_to_i64(file_summary.mode);
641         let hash = file_summary.hash;
643         self.conn
644             .prepare_cached(
645                 "INSERT INTO NAMING_FILE_INFO(
646                 PATH_PREFIX_TYPE,
647                 PATH_SUFFIX,
648                 TYPE_CHECKER_MODE,
649                 DECL_HASH,
650                 CLASSES,
651                 CONSTS,
652                 FUNS,
653                 TYPEDEFS,
654                 MODULES
655             )
656             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);",
657             )?
658             .execute(params![
659                 prefix_type,
660                 suffix,
661                 type_checker_mode,
662                 hash,
663                 Self::join_with_pipe(file_summary.classes()),
664                 Self::join_with_pipe(file_summary.consts()),
665                 Self::join_with_pipe(file_summary.funs()),
666                 Self::join_with_pipe(file_summary.typedefs()),
667                 Self::join_with_pipe(file_summary.modules()),
668             ])?;
669         let file_info_id = crate::FileInfoId::last_insert_rowid(&self.conn);
670         Ok(file_info_id)
671     }
673     fn join_with_pipe<'a>(symbols: impl Iterator<Item = &'a crate::DeclSummary>) -> Option<String> {
674         let s = symbols
675             .map(|summary| summary.symbol.as_str())
676             .collect::<Vec<_>>()
677             .join("|");
678         if s.is_empty() { None } else { Some(s) }
679     }
681     /// This updates the forward naming table.
682     /// It will replace the existing entry (preserving file_info_id) if file was present,
683     /// or add a new entry (with new file_info_id) otherwise.
684     /// It returns the file_info_id.
685     /// Note: it never deletes a row.
686     /// TODO(ljw): reconcile with existing delete() and insert_file_info_and_get_file_id()
687     pub fn fwd_update(
688         &self,
689         path: &RelativePath,
690         parsed_file: Option<&oxidized_by_ref::direct_decl_parser::ParsedFile<'_>>,
691     ) -> anyhow::Result<crate::FileInfoId> {
692         let file_info_id_opt = self
693             .conn
694             .prepare_cached(
695                 "SELECT FILE_INFO_ID FROM NAMING_FILE_INFO
696                 WHERE PATH_PREFIX_TYPE = ?
697                 AND PATH_SUFFIX = ?",
698             )?
699             .query_row(params![path.prefix() as u8, path.path_str()], |row| {
700                 row.get::<usize, crate::FileInfoId>(0)
701             })
702             .optional()?;
704         let file_info_id = match file_info_id_opt {
705             Some(file_info_id) => file_info_id,
706             None => {
707                 self.conn
708                 .prepare_cached("INSERT INTO NAMING_FILE_INFO(PATH_PREFIX_TYPE,PATH_SUFFIX) VALUES (?1, ?2);")?
709                 .execute(params![
710                     path.prefix() as u8,
711                     path.path_str(),
712                 ])?;
713                 crate::FileInfoId::last_insert_rowid(&self.conn)
714             }
715         };
717         // This helper takes a list of (name,decl) pairs and turns into string "name1|name2|..."
718         fn join<'a, Decl, I: Iterator<Item = (&'a str, Decl)>>(i: I, sep: &'static str) -> String {
719             i.map(|(name, _decl)| name).collect::<Vec<&str>>().join(sep)
720         }
722         let decls_or_empty = parsed_file
723             .map_or_else(oxidized_by_ref::direct_decl_parser::Decls::empty, |pf| {
724                 pf.decls
725             });
726         self.conn
727             .prepare_cached(
728                 "
729                 UPDATE NAMING_FILE_INFO
730                 SET TYPE_CHECKER_MODE=?, DECL_HASH=?, CLASSES=?, CONSTS=?, FUNS=?, TYPEDEFS=?, MODULES=?
731                 WHERE FILE_INFO_ID=?
732                 ",
733             )?
734             .execute(params![
735                 crate::datatypes::convert::mode_to_i64(parsed_file.and_then(|pf| pf.mode)),
736                 crate::hash_decls(&decls_or_empty),
737                 join(decls_or_empty.classes(), "|"),
738                 join(decls_or_empty.consts(), "|"),
739                 join(decls_or_empty.funs(), "|"),
740                 join(decls_or_empty.typedefs(), "|"),
741                 join(decls_or_empty.modules(), "|"),
742                 file_info_id,
743             ])?;
745         Ok(file_info_id)
746     }
748     /// This removes an entry from the forward naming table.
749     /// TODO(ljw): reconcile with delete.
750     pub fn fwd_delete(&self, file_info_id: crate::FileInfoId) -> anyhow::Result<()> {
751         self.conn
752             .prepare_cached("DELETE FROM NAMING_FILE_INFO WHERE FILE_INFO_ID = ?")?
753             .execute(params![file_info_id])?;
754         Ok(())
755     }
757     /// This updates the reverse naming-table NAMING_SYMBOLS and NAMING_SYMBOLS_OVERFLOW
758     /// by removing all old entries, then inserting the new entries.
759     /// TODO(ljw): remove previous implementations of insert_file_summary.
760     pub fn rev_update(
761         &self,
762         symbol_hash: ToplevelSymbolHash,
763         winner: Option<&crate::SymbolRow>,
764         overflow: &[&crate::SymbolRow],
765     ) -> anyhow::Result<()> {
766         self.conn
767             .prepare("DELETE FROM NAMING_SYMBOLS WHERE HASH = ?")?
768             .execute(params![symbol_hash])?;
769         self.conn
770             .prepare("DELETE FROM NAMING_SYMBOLS_OVERFLOW WHERE HASH = ?")?
771             .execute(params![symbol_hash])?;
772         if let Some(symbol) = winner {
773             self.conn.prepare("INSERT INTO NAMING_SYMBOLS (HASH, CANON_HASH, DECL_HASH, FLAGS, FILE_INFO_ID) VALUES (?,?,?,?,?)")?
774             .execute(params![
775                 symbol.hash,
776                 symbol.canon_hash,
777                 symbol.decl_hash,
778                 symbol.kind,
779                 symbol.file_info_id
780             ])?;
781         }
782         for symbol in overflow {
783             self.conn.prepare("INSERT INTO NAMING_SYMBOLS_OVERFLOW (HASH, CANON_HASH, DECL_HASH, FLAGS, FILE_INFO_ID) VALUES (?,?,?,?,?)")?
784             .execute(params![
785                 symbol.hash,
786                 symbol.canon_hash,
787                 symbol.decl_hash,
788                 symbol.kind,
789                 symbol.file_info_id
790             ])?;
791         }
793         Ok(())
794     }
796     /// This creates a sqlite directory at the specified path.
797     /// It will fail if the directory containing that path doesn't exist.
798     pub fn build_at_path(
799         path: impl AsRef<Path>,
800         file_summaries: impl IntoIterator<Item = (RelativePath, crate::FileSummary)>,
801     ) -> anyhow::Result<Self> {
802         let path = path.as_ref();
803         let conn = Connection::open(path)?;
804         let log = slog::Logger::root(slog::Discard, slog::o!());
805         let (conn, _save_result) = Self::build(&log, conn, |tx| {
806             file_summaries.into_iter().try_for_each(|x| Ok(tx.send(x)?))
807         })?;
808         Ok(Self { conn })
809     }
811     /// Build a naming table using the information provided in
812     /// `collect_file_summaries` and writing to `conn`. The naming table will be
813     /// built on a background thread while `collect_file_summaries` is run. Once
814     /// all file summaries have been sent on the `Sender`, drop it (usually by
815     /// letting it go out of scope as `collect_file_summaries` terminates) to
816     /// allow building to continue.
817     pub fn build(
818         log: &slog::Logger,
819         mut conn: rusqlite::Connection,
820         collect_file_summaries: impl FnOnce(
821             crossbeam::channel::Sender<(RelativePath, crate::FileSummary)>,
822         ) -> anyhow::Result<()>,
823     ) -> anyhow::Result<(rusqlite::Connection, crate::SaveResult)> {
824         // We can't use rusqlite::Transaction for now because a lot of methods
825         // we want to use are on Self, and Self wants ownership of the
826         // Connection. Sqlite will automatically perform a rollback when the
827         // connection is closed, which will happen (via impl Drop for
828         // Connection) if we return Err here.
829         conn.execute("BEGIN TRANSACTION", params![])?;
830         Self::create_tables(&mut conn)?;
832         let mut names = Self { conn };
833         let save_result = crossbeam::thread::scope(|scope| -> anyhow::Result<_> {
834             let (tx, rx) = crossbeam::channel::unbounded();
836             // Write to the db serially, but concurrently with parsing
837             let names = &mut names;
838             let db_thread = scope.spawn(move |_| -> anyhow::Result<_> {
839                 let mut save_result = crate::SaveResult::default();
840                 while let Ok((path, summary)) = rx.recv() {
841                     names.insert_file_summary(&path, &summary, &mut save_result)?;
842                 }
843                 Ok(save_result)
844             });
846             // Parse files (in parallel, if the caller chooses)
847             collect_file_summaries(tx)?;
849             db_thread.join().unwrap()
850         })
851         .unwrap()?;
852         names.set_checksum(save_result.checksum)?;
853         let mut conn = names.conn;
855         slog::info!(log, "Creating indices...");
856         Self::create_indices(&mut conn)?;
858         slog::info!(log, "Closing DB transaction...");
859         conn.execute("END TRANSACTION", params![])?;
861         Ok((conn, save_result))
862     }
865 /// This token is for a transaction. When you construct it,
866 /// it does sql command "BEGIN EXCLUSIVE TRANSACTION".
867 /// When you call end() or drop it, it does sql command "END TRANSACTION".
868 /// (There's also a similar rusqlite::Transaction, but it takes &mut
869 /// ownership of the connection, which makes it awkward to work with
870 /// all our methods.)
871 pub struct Transaction<'a> {
872     conn: Option<&'a rusqlite::Connection>,
875 impl<'a> Transaction<'a> {
876     fn new(conn: &'a rusqlite::Connection) -> anyhow::Result<Self> {
877         conn.execute_batch("BEGIN EXCLUSIVE TRANSACTION;")?;
878         Ok(Self { conn: Some(conn) })
879     }
881     pub fn end(mut self) -> anyhow::Result<()> {
882         if let Some(conn) = self.conn.take() {
883             conn.execute_batch("END TRANSACTION;")?;
884         }
885         Ok(())
886     }
889 impl<'a> Drop for Transaction<'a> {
890     fn drop(&mut self) {
891         if let Some(conn) = self.conn.take() {
892             let _ignore = conn.execute_batch("END TRANSACTION;");
893         }
894     }