import pandas as pd
import os
import openai
import sys, pathlib, fitz
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.chat_models import ChatOpenAI
from langchain.memory import VectorStoreRetrieverMemory
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
# from langchain.prompts.chat import (
# ChatPromptTemplate,
# SystemMessagePromptTemplate,
# AIMessagePromptTemplate,
# HumanMessagePromptTemplate,
# )
# from langchain.schema import AIMessage, HumanMessage, SystemMessage
Option 2
- Tiktoken Splitter
- OpenAI Embeddings
- FAISS
- Contextual Compress
- RetrievalQA
- ConversationalRetrievalQA
- Memory
from dotenv import load_dotenv, find_dotenv
= load_dotenv(find_dotenv()) # read local .env file
_
= os.environ['OPENAI_API_KEY'] openai.api_key
# Helper function for printing docs
def pretty_print_docs(docs):
print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))
Remove Citation Pages
From our Option 1 experiment, we noticed the responses and source documents were derived by the reference pages and non relevant graphs in the appendix. Luckily, the appendix is always after the reference so I create this simple function to drop the first page of references as well as all subsequent pages.
- This saves tokens and we dont need to process irrelevant text
- Removes the risk of generating irrelevant data
def has_citations(page_text):
# List of variations of "citations" to check for
= ["Citations", "Literature Cited", "Literature Citations"]
citation_variations
for variation in citation_variations:
if variation in page_text:
return True
return False
def remove_citation_pages(pdf_path):
= fitz.open(pdf_path)
doc = []
non_citation_pages
for page_num in range(doc.page_count):
= doc[page_num]
page = page.get_text()
page_text
if not has_citations(page_text):
non_citation_pages.append(page_num)
# Create a new PDF with only non-citation pages
= fitz.open()
new_pdf for page_num in non_citation_pages:
=page_num, to_page=page_num)
new_pdf.insert_pdf(doc, from_page
return new_pdf
= './hms/fhl_2014_Charifson_34622 (1).pdf'
input_pdf_path
# Create a new PDF with citation pages removed
= remove_citation_pages(input_pdf_path)
new_pdf
# Save the new PDF to 'output.pdf'
= 'output.pdf'
output_pdf_path
new_pdf.save(output_pdf_path) new_pdf.close()
Extract text from the new PDF file w/ reference pages removed
def extract_text_from_pdf(pdf_path):
= fitz.open(pdf_path)
doc = ""
text for page in doc:
+= page.get_text()
text
doc.close()return text
# Replace 'output.pdf' with the path to the PDF you created with citation pages removed
= 'output.pdf'
new_pdf_path
# Extract text from the new PDF
= extract_text_from_pdf(new_pdf_path)
new_pdf_text
# Now you can manipulate the extracted text or perform any other actions as needed
# For example, you can print the extracted text:
print(new_pdf_text)
Charifson 1
Snail Predation by Hemigrapsus nudus and Hemigrapsus oregonensis: Sex and Species
Differences in Chela Size.
David Charifson1,2
Marine Invertebrate Zoology
Summer 2014
1 Friday Harbor Laboratories, University of Washington, Friday Harbor, WA 98250
2 Department of Ecology and Evolution, Stony Brook University, Stony Brook, NY 11794-5245
Contact information:
David Charifson
Department of Ecology and Evolution
Stony Brook University
650 Life Sciences Building
Stony Brook, NY 11794-5245
David.charifson@stonybrook.edu
Keywords: Hemigrapsus nudus, Hemigrapsus orogenensis, Littorina scutulata, Lacuna vincta,
predation, sexual dimorphism, character displacement
Charifson 2
Abstract:
The relationship between propus size and use of snails as prey in sympatric populations
of Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal
width increases with body size in a predictable manner in male and female crabs of both species.
Sexual dimorphism of propal height and width relationships in both shore crabs is apparent, but
there was no detectible difference in chela size between H. nudus and H. oregonensis. The lack
of difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is
no character displacement in this trait. Both species of crabs showed a strong feeding preference
for the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely
due to the ease of consumption. There were no differences in snail consumption rates between
male and female H. nudus of similar propal heights, mostly due to high feeding variation among
individual crabs.
Introduction:
A multitude of species coexist in the marine intertidal environment. Closely related
species may be sympatric and compete for resources, such as food. Two species of shore crab,
Hemigrapsus nudus (Dana 1851) and Hemigrapsus oregonensis (Dana 1851), occur in sympatry
in the Salish Sea. Both species are sexually dimorphic, with the males having larger chelae.
Species in sympatry may exhibit character displacement, the exaggeration of morphological or
behavioral characteristics due to competition for resources (Brown and Wilson 1956). When H.
nudus and H. oregonensis co-occur, H. oregonensis tends to be found closer to the waterand in
finer sediment than the more desiccation-tolerant H. nudus (Sliger 1982). There is still
considerable habitat overlap between these two species; the underside of a single rock may have
Charifson 3
roughly equal abundances of the two crab species (personal observation). The size of the chela
may differ, which could allow the crabs to differentiate in food resource utilization.
Both species are omnivores that eat algae and small invertebrates, including snails
(Behrens Yamada and Boulding 1996). The two shore crab species are known to eat Littorina
sitkana (Philippi 1846) and Littorina scutulata (Gould 1849), which also occur in the rocky
intertidal zone (Behrens Yamada and Boulding 1996). Li. scutulata moves upshore in response
to the presence of H. nudus and crushed conspecifics, where H. nudus density is lower (Keppel
and Scrosati 2004). This suggests that H. nudus represents an ecologically significant predation
threat to littorines.
The thick shelled littorines are difficult prey for both H. nudus and H. oregonensis as
neither crab species is a molluscivore specialists; they typically scrape or pick algae and softer
invertebrates off rocks using their chelae (Behrens Yamada and Boulding 1996). Behrens
Yamada and Boulding (1998) found that large H. nudus were capable of consuming Li. sitkana
that were less than 8 mm in length, but had only a 37% success rate of consuming snails with
lengths between 5.5-7 mm .
I sought to investigate differences in propus size between between males and females of
these two crab species. The size of the propus of the chela is positively correlated with the
amount of force that can be produced due to greater musculature (Behrens Yamada and Boulding
1998). A significant difference in propus size between the two species would suggest that further
testing of character displacement may be warranted. A disparity in propus size to body size
between male and female crabs might result in different feeding rates and shell breaking
capabilities. I devised a test of preference by varying snail shell strength within the same size
class. The snail species Li. scutulata and Lacuna vincta (Montagu 1803) are both in the family
Charifson 4
Littorinidae and have similar shell shape but different shell thickness. Lacuna vincta has a
thinner, more fragile shell and is likely to be preferred as prey by shore crabs. I also hypothesize
that, if chela size is held constant, female crabs will have greater snail consumption rates than
male crabs. Female crabs with claws equivalent to males have a larger body mass,are likely to
have greater metabolic need, and are less easily satiated.
Study System:
All specimens were collected on San Juan Island, Washington. Two shore crabs (clade:
Brachyura: Family Grapsidae), Hemigrapsus oregonensis and Hemigrapsus nudus were
collected in the intertidal adjacent to the Friday Harbor Laboratories dock. These closely related
crabs are morphologically similar, but can be distinguished by the presence of purple spots on
the cheliped of H. nudus and abundant setae on the pereopods of H. oregonensis (Kozloff 1987).
Lacuna vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and
Dead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were
collected from the rocky intertidal zone of Fourth of July Beach.
Materials and Methods:
Morphometrics
Male and females of H. oregonensis and H. nudus were measured with digital calipers
(0.1 mm). Each crab was measured for carapace width, propal height and propal width. Carapace
width was taken at the widest part of the carapace. Propal height was measured at the highest
vertical distance along the propus. Propal width was measured perpendicular to propal height,
from the left to right sides of the propus. All claw measurements were made on the left cheliped.
Charifson 5
If there was a size disparity between the left and right claws due to regeneration of a lost claw,
the crab was excluded from the study. Ratios of carapace width:propal height (CW:PH) and
carapace width:propal width (CW:PW) were calculated.
Crab Feeding Preference
Three female (Fe1, Fe2, Fe3) and three male (Ma1, Ma2, Ma3) crabs of H. nudus with
propal heights between 5-6 mm were placed in small flow-through containers. Five L. scutulata
and five L. vincta with shell lengths between 4-6 mm were added to each container. The ranges
of crab propal height and snail shell length were chosen based on prior studies to ensure that the
crabs were capable of feeding on the hard-shelled L. scutulata (Behrens Yamada and Boulding
1998). H. oregonensis was excluded due to the rarity of female crabs of that species within the
range of appropriate propal heights. After approximately 15 hours the number of L. scutulata and
L. vincta consumed was counted. Three trials were conducted for each crab.
Snail Consumption by Hemigrapsus nudus
The crabs from the preference experiment were given five of each snail in each trial.
After 1.5 to 3 hours the number of snails consumed was counted and converted to a consumption
rate (snails consumed/hour). A total of eight feeding trials were conducted.
Statistical Analysis
For the morphometric analysis, crabs were divided into four groups based on sex and
species. Standardized major axis regression was used to determine the relationship between crab
morphometric parameters. Arcsin transformations were used to correct for non-normality (Sokol
and Rohlf 2011). Differences in CW:PH and CW:PW were analyzed using a two-way ANOVA
Charifson 6
with factors: Sex and Species. Prey selection by H. nudus was analyzed using G-tests for each
individual that consumed snails. An one-way ANOVA was used to determine differences in snail
consumption rates by H. nudus.
Results:
Morphometrics:
Carapace width was a good predictor of both propal height (Figure 1, Table 1A) and
propal width (Figure 2, Table 1B) in female and male H. oregonensis and H. nudus. There was
less variation in the relationship between carapace width and propal height than with propal
width. Due to non-normality of CW:PH and CW:PW an arcsin transformation was used. A
significant effect for sex was found for both CW:PH (F1, 45 = 125.6, p < 0.001) and CW:PW (F1,
45 = 103.81, p < 0.001). There was no significant difference between Hemigrapsus species for
CW:PH (F1, 45 < 0.01, p = 0.983) and CW:PW (F1, 45 = 0.09, p = 0.764). A significant
sex*species interaction was detected for both CW:PH (F1, 45 4.39, p = 0.042) and CW:PW (F1, 45
= 7.19, p = 0.010). Figure 3 and 4 show the means of CW:PH and CW:PW in by sex and species
respectively.
Crab Feeding Preference:
Expected values for both the number of L. scutulata and L. vincta consumed for the G-
test are 0.5 multiplied by the total number of snails consumed by each individual crab. G-tests
for individual H. nudus that eat snails were all significant at p < 0.001 with 1 degree of freedom
(See Table 2 for statistical summary). The three crabs that did not eat either L. scutulata or L.
vincta could not be tested for feeding preference.
Charifson 7
Snail Consumption by Hemigrapsus nudus
No significant difference in crab feeding rate (snails consumed/hour) was found (F2, 21 =
2.52, p = 0.104) among individual H. nudus that fed on snails during the course of the
consumption experiment (Figure 5). Fe2 ate snails at a rate of 0.59±0.53 snails/hour (variance is
standard deviation), Ma1 feeding rate was 0.24±0.31 snails/hour, and Ma2 had a feeding rate of
0.80±0.38 snails/hour. Individuals Fe1, Fe3 and Ma3 did not consume any snails during the
course of the experiment.
Discussion:
A few interesting patterns emerged in the morphometric analysis of Hemigrapsus nudus
and Hemigrapsus orogenensis. The variance in propal height or propal width explained by
carapace width was surprisingly high in female H. nudus with R2 values of 0.976 found for
propal height and 0.927 for propal width. Male H. oregenensis had the greatest variation in chela
size parameters, demonstrated by R2 of 0.693 and 0.534 for propal height and propal width
respectively. The R2 values for propal width regressed against carapace width were consistently
lower than propal height regressed against carapace width across species and sexes (Table 1). In
general the ability of carapace width to explain the variance in chela size parameters (i.e.: R2
values) was greater in females of both crab species (Table 1, Figures 1 and 2). Males may have
more variation in claw size due to trade-offs between claw size and factors like feeding
efficiency or resource allocation. Trade-offs relating to chela size are found in fiddler crabs,
which exhibit extreme sexual dimorphism. Fiddler crabs have a trade-off between the intrasexual
competition and intersexual signaling functions of the dimorphic claw (Swanson et al. 2013).
Charifson 8
This is not to say that the particular trade-offs in fiddler crabs case are analogous to that of
Hemigrapsus, since both claws in the two shore crabs in this study are used in feeding.
Sexual dimorphism in chela size, which is readily apparent to the eye in both
Hemigrapsus nudus and Hemigrapsus oregonensis, was detected statistically (Figures 3 and 4).
The two-way ANOVA with factors sex and species did not show a significant species effect,
suggesting that the claws of both shore crabs produce roughly the same force and are
functionally equivalent (Behrens Yamada and Boulding 1998). The lack of morphological
difference in the size of the chelae between the crab species removes the possibility of character
displacement in the trait. It should be noted that this study was not designed to test for character
displacement, but to determine if this would be an interesting question for future investigations.
To make a compelling case for character displacement it is necessary to compare multiple
sympatric and allopatric populations with similar abiotic and biotic conditions (Stuart and Losos
2013). Also it must be demonstrated that the trait differs due to genetic differences between
sympatric and allopatric populations, that the differences between the populations are not due to
species sorting, that the morphological trait is correlated to differences in resource use between
the two sympatric species, and that the similar phenotype in allopatric populations compete for
the same resources (Stuart and Losos 2013). The tendency of H. nudus to occupy the upper
intertidal zone (Sliger 1987), despite considerable overlap with H. oregonensis, may provide
enough differential resource use to prevent exclusion in sympatry. Desiccation tolerance may be
a better trait to test for character displacement in these shore crabs.
H. nudus males and females clearly preferred to eat the thinner shelled Lacuna vincta
(Table 2). During the course of the feeding preference experiment all the crabs that ate snails
consumed L. vincta exclusively. However, one Littorina scutuala was eaten by H. nudus when
Charifson 9
determining consumption rates. The six crabs were chosen because their claws were of
appropriate size to eat littorines with shell length between 4-6 mm, yet only half of the crabs ate
these snails. This could be due to insufficient time to acclimate H. nudus to lab conditions and
possibly reduced feeding associated with molting. Male snow crabs, Chionoecetes opilio, were
shown to cease feeding 3-6 weeks before and 3-4 weeks after molting (O’Halloran and O’Dor
1988).
It was my original intention to test for sex differences in consumption rates, but this was
not possible as I could not collect many crabs of the appropriate claw size for gastropod feeding
and many of the crabs I did collect did not feed. There were no statistically significant
differences in consumption rates (snails eaten/hour) among the three feeding H. nudus. These
individuals had highly variable feeding rates, mostly due to trials in which the crab did not feed.
Sylvia Behrens Yamada and Elizabeth Boulding (1998) found that H. nudus consumed 1.8
Littorina. sitkana/individual crab/day. In my study crabs fed on 13.03 Lacuna vincta/individual
crab/ day. The greater consumption rate in my study is likely due to the easily breakable shells of
La. vincta.
The findings of this study suggest that there is no potential for character displacement of
chela size in the sympatric H. nudus and H. oregonensis. This leaves the question of species co-
occurrence open to other possibilities. H. nudus has a strong preference for La. vincta, but it
seems as if there have been no studies that show predation on La. vincta in the field. It should be
noted that H. nudus and La. vincta usually occupy different portions of the intertidal and may
have little contact with each other, unlike the relationship between H. nudus and Li. scutulata.
There is some potential for overlap in the winter when La. vincta migrates up shore. Although no
differences in consumption rates between male and female H. nudus were found, this might be
Charifson 10
due to logistical issues surrounding the experimental design and little replication. Future studies
should include more crabs, the exclusion of non-feeding crabs, and trial periods with longer time
intervals. Additionally, any subsequent study should utilize multivariate and geometric
morphometric methods to quantify claw shape instead of using ratios, as two similar ratios may
have different shape.
Acknowledgements:
I would like to thank Dianna Padilla, Michael LaBarbera, and Kevin Turner for advice
relating to the experimental design of this study. I would also like to thank the director and staff
of Friday Harbor Laboratories for use of facilities and permission to collect organisms. Friday
Harbor Laboratories, The Libbie Hyman Scholarship, Society for Integrative and Comparative
Biology, and the Stony Brook Department of Ecology and Evolution provided financial support
and have my gratitude.
Charifson 12
Swanson, B. O., George, M. N., Anderson, S. T., and Christy, J. H. 2013. Evolutionary variation
in the mechanics of fiddler crab claws. BMC Evolutionary Biology 13: 137.
Charifson 13
Table 1: SMA regressions of carapace width and propus measures.
A) The relationship between carapace width and propal height. x is carapace width and y is
propal height. B) The relationship between carapace width and propal height. x is carapace width
and y is propal width.
SMA Regression
A
N
Carapace Width vs Propal Height
R2
Female H. nudus
13
y = 0.273*x - 0.678
0.976
Male H. nudus
13
y = 0.311*x - 1.385
0.868
Female H. oregonensis
9
y = 0.351*x - 0.833
0.894
Male H. oregonensis
14
y = 0.39*x - 1.149
0.693
SMA Regression
B
N
Carapace Width vs Propal Width
R2
Female H. nudus
13
y = 0.157*x - 0.386
0.927
Male H. nudus
13
y = 0.209*x - 1.288
0.859
Female H. oregonensis
9
y = 0.175*x + 0.037
0.724
Male H. oregonensis
14
y = 0.244*x - 0.688
0.534
Charifson 14
Table 2: Size and prey preference in H. nudus.
Size parameters of experimental Hemigrapsus nudus and number of prey consumed in 24 hour
intervals (three trials). Crabs Fe1, Fe3, and Ma3 did not feed on snails and were not testable (NT)
for preference.
Individual
Carapace
Width (mm)
Propal
Height (mm)
Number of
Lacuna vincta
consumed
Number of
Littorina scutulata
consumed
G
p-value
Fe1
23.6
5.9
0
0
NT
Fe2
23.1
5.5
15
0
20.8
p < 0.001
Fe3
20.6
5
0
0
NT
Ma1
19.1
5.4
14
0
19.4
p < 0.001
Ma2
18.4
5.4
15
0
20.8
p < 0.001
Ma3
20.1
6
0
0
NT
Charifson 15
Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from
SMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus.
C) Female H. oregonensis. D) Male H. oregonensis.
Charifson 16
Figure 2: Relationship of carapace width and propal width in Hemigrapsus.
Line of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H.
nudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis.
Charifson 17
Figure 3: Differences in propal height:carapace width ratio between sex and species.
The sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was
insignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042).
Error bars represent standard error of the mean.
0
0.05
0.1
0.15
0.2
0.25
0.3
0.35
H. nudus
H. oregonensis
Propal Height:Carapace Width
Crab Species
Female
Male
Charifson 18
Figure 4: Differences in propal width:carapace width ratio between sex and species.
The sex factor was statistically significant (F1,45 = 103.8. p < 0.001), while the species factor was
insignificant (F1,45 = 0.09, p = 0.764). There was a significant interaction (F1,45 = 7.19, p =
0.01). Error bars represent standard error of the mean.
0
0.05
0.1
0.15
0.2
0.25
H. nudus
H. oregonensis
Propal Widtht:Carapace Width
Crab Species
Female
Male
Charifson 19
Figure 5: Consumption rates by individual H. nudus.
Mean consumption rates (n = 8 trials) of 3 female (Fe1 to Fe3) and 3 male (Ma1 to Ma3) H.
nudus. Crabs Fe1, Fe3, and Ma3 did not consume snails. The individuals that eat snails did not
differ in their consumption rates (F2,21 = 2.52, p = 0.104). Error bars represent standard error of
the mean.
0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1
Fe1
Fe2
Fe3
Ma1
Ma2
Ma3
Mean Consumption Rate
(Snails Consumed/Hour)
Individual H. nudus
# Read in some Data
= PyMuPDFLoader("hms/fhl_2014_Charifson_34622 (1).pdf")
loader
# Now taht we have our PDF document loaded into a loader object, we move onto text splitters
= loader.load_and_split() pages
# with open("hms/kr111tr5711.txt", encoding="utf8") as f:
# text_file = f.read()
Tiktoken Splitter
tiktoken is a fast byte pair encoding (BPE) tokenizer created by OpenAI.
We can use it to estimate tokens used. It will probably be more accurate for the OpenAI models.
- How the text is split: by character passed in
- How the chunk size is measured: by tiktoken tokenizer
Models don’t see text like humans, instead they see a sequence of numbers (known as tokens). Byte pair encoding (BPE) is a way of converting text into tokens. It has a couple desirable properties:
- It’s reversible and lossless, so you can convert tokens back into the original text
- It works on arbitrary text, even text that is not in the tokeniser’s training data
- It compresses the text: the token sequence is shorter than the bytes corresponding to the original text. On average, in practice, each token corresponds to about 4 bytes.
- It attempts to let the model see common subwords. For instance, “ing” is a common subword in English, so BPE encodings will often split “encoding” into tokens like “encod” and “ing” (instead of e.g. “enc” and “oding”). Because the model will then see the “ing” token again and again in different contexts, it helps models generalise and better understand grammar.
AttributeError: ‘str’ object has no attribute ’page_content
docs2 = tk_text_splitter.split_text(text_file) –> tk_text_splitter.create_documents(text_file)
#load a tiktoken splitter directly
from langchain.text_splitter import TokenTextSplitter
= TokenTextSplitter(chunk_size = 200, chunk_overlap = 0)
tk_text_splitter
= tk_text_splitter.split_documents(pages)
docs2
print(docs2[0])
page_content='Charifson 1 \n \nSnail Predation by Hemigrapsus nudus and Hemigrapsus oregonensis: Sex and Species \nDifferences in Chela Size. \n \nDavid Charifson1,2 \n \nMarine Invertebrate Zoology \nSummer 2014 \n \n \n \n \n1 Friday Harbor Laboratories, University of Washington, Friday Harbor, WA 98250 \n2 Department of Ecology and Evolution, Stony Brook University, Stony Brook, NY 11794-5245 \n \n \n \n \nContact information: \nDavid Charifson \nDepartment of Ecology and Evolution \nStony Brook University \n650 Life Sciences Building \nStony Brook, NY 11794-5245 \nDavid.charifson@stonybrook.edu \n \n \n \n \nKeywords: Hemig' metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 0, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}
#Initialize Embeddings
= OpenAIEmbeddings() embeddings
FAISS Vectorstore
Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.
To retrieve text, there are two search types you can choose:
search_type=“similarity” uses similarity search in the retriever object where it selects text chunk vectors that are most similar to the question vector.
search_type=“mmr” uses the maximum marginal relevance search where it optimizes for similarity to query AND diversity among selected documents.
In our case, we wouldnt want to much diversity in our documents as we want the most accurate information. K = 3 returns the 3 most relevant documents.
As you will see, there are some inconsistencies in K as some returned 1, 2, or more than 3 relevant documents.
= FAISS.from_documents(docs2, embeddings).as_retriever(search_type = "similarity", search_kwargs= {"k": 3}) vector_store
Contextual compression
One challenge with retrieval is that usually you don’t know the specific queries your document storage system will face when you ingest data into the system. This means that the information most relevant to a query may be buried in a document with a lot of irrelevant text. Passing that full document through your application can lead to more expensive LLM calls and poorer responses.
Contextual compression is meant to fix this. The idea is simple: instead of immediately returning retrieved documents as-is, you can compress them using the context of the given query, so that only the relevant information is returned. “Compressing” here refers to both compressing the contents of an individual document and filtering out documents wholesale.
To use the Contextual Compression Retriever, you’ll need:
- a base Retriever
- a Document Compressor
The Contextual Compression Retriever passes queries to the base Retriever, takes the initial documents and passes them through the Document Compressor. The Document Compressor takes a list of Documents and shortens it by reducing the contents of Documents or dropping Documents altogether.
Adding contextual compression with an LLMChainExtractor
Now let’s wrap our base retriever with a ContextualCompressionRetriever. We’ll add an LLMChainExtractor, which will iterate over the initially returned documents and extract from each only the content that is relevant to the query.
'''We wrapped our base retriever with a ContextualCompressionRetriever. Also added LLMChainExtractor, which will iterate
over the initially returned documents and extract from each only the content that is relevant to the query'''
= ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
llm
= LLMChainExtractor.from_llm(llm)
compressor
= ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store) compression_retriever
= compression_retriever.get_relevant_documents("What is this paper about?")
compressed_docs
pretty_print_docs(compressed_docs)
C:\Users\aclao89\AppData\Local\anaconda3\lib\site-packages\langchain\chains\llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.
warnings.warn(
Document 1:
The benthic red alga Prionitis lanceolata Harvey shows an increased primary productivity in the outfall canyon of the Carmel, California, marine sewage outfall. At a distance of twenty meters from the outfall this productivity shows a large reduction before again rising to a high value at approximately 20 to 4O meters. The differing productivities of samples from Mission Point, Monastery Beach, and Cabrillo Point (Hopkins Marine Station) are compared in relation to the currents which were found. Possible causes for these differences in productivity are diseussed.
----------------------------------------------------------------------------------------------------
Document 2:
This paper presents the results of primary productivity measurements of P. lanceolata samples collected near and away from the outfall. Some hypotheses are made with respect to the sewage's effect on the primary productivity. The "light and dark bottle" method of algal productivity measurement as used here, was found to not be especially applicable when applied to benthic marine algae. Therefore, the significance of the results should be accepted cautiously. Knowledge of the dispersion behavior of the sewage was important to both the collection of samples and the interpretation of experimental results; therefore, two group projects were completed during the term of study. First, a map of the immediate outfall area was constructed through the use of compass readings and measured lines. This map was marked in a grid with squares.
= compression_retriever.get_relevant_documents("Summarize the paper concisely with reference to materials and methods.")
compressed_docs
pretty_print_docs(compressed_docs)
Document 1:
sewage outfalls.
This paper presents the results of primary
productivity measurements of P. lanceolata samples
collected near and away from the outfall. Some
hypotheses are made with»respect to the sewage's
effect on the primary productivity. The "light and
dark bottle" method of algal productivity measurement
as used here, was found to not be especially applicable
when applied to benthic marine algae. Therefore,
© the significance of the results should be accepted cautiously.
wD
© MATERIALS AND METHODS
Knowledge of the dispersion behavior of the
sewage was important to both the collection of
samples and the interpretation of experimental results;
therefore, two group projects were completed during
the term of study. First, a map of the immediate
outfall area was constructed through the use of
compass readings and measured lines. This map was
marked in a grid with squares
----------------------------------------------------------------------------------------------------
Document 2:
the dissolved 0, "fixed" (Strickland and Parsons,
1965), and a 50 ml volume removed from each bottle
© for the standard Winkler titration (Strickland and
Parsons, 1965). Two "initial bottles" containing
only the aerated sea water were fixed using the same
procedure and also titrated.
----------------------------------------------------------------------------------------------------
Document 3:
A map of the entire Carmel Bay area, partially represented in figures 3, 4, was obtained from the California State Department of Beaches and Parks.
Second, two current studies were undertaken using fluorescein dye and color coded, marked bottles.
The bottles were set from a small skiff in predetermined lines, and compass readings (figure 2, tables I, II) made at timed intervals from a suitable reference point.
The bottles' actions were then plotted using this information (figures 3, 4).
= compression_retriever.get_relevant_documents("Write a one sentence summary of the purpose of the paper")
compressed_docs
pretty_print_docs(compressed_docs)
Document 1:
This paper presents the results of primary productivity measurements of P. lanceolata samples collected near and away from the outfall. Some hypotheses are made with respect to the sewage's effect on the primary productivity.
= compression_retriever.get_relevant_documents("Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?")
compressed_docs
pretty_print_docs(compressed_docs)
Document 1:
This paper presents the results of primary productivity measurements of P. lanceolata samples collected near and away from the outfall.
----------------------------------------------------------------------------------------------------
Document 2:
Collection of samples
Prionitis lanceolata was collected, labeled
with the collection site sample number (eege, 50-b),
and placed in plastic bags for transport to the lab.
The location, time of collection, tidal conditions,
and a deseription of the algae and the collection
site were all noted on a collection sheet (appendix).
= compression_retriever.get_relevant_documents("Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?")
compressed_docs
pretty_print_docs(compressed_docs)
Document 1:
This paper presents the results of primary productivity measurements of P. lanceolata samples collected near and away from the outfall.
----------------------------------------------------------------------------------------------------
Document 2:
The benthic red alga Prionitis lanceolata Harvey shows an increased primary productivity in the outfall canyon of the Carmel, California, marine sewage outfall. At a distance of twenty meters from the outfall this productivity shows a large reduction before again rising to a high value at approximately 20 to 4O meters. The differing productivities of samples from Mission Point, Monastery Beach, and Cabrillo Point (Hopkins Marine Station) are compared in relation to the currents which were found.
----------------------------------------------------------------------------------------------------
Document 3:
This alga has been found in very close proximity to the marine sewage outfalls of Carmel and Pacific Grove, and for this reason was chosen as an experimental tool of marine sewage pollution study.
= compression_retriever.get_relevant_documents("What are the scientific names of the species mentioned in this paper?")
compressed_docs
pretty_print_docs(compressed_docs)
Document 1:
Prionitis lanceolata Harvey is a species of benthic red algae which is widely distributed in the Monterey Bay, California, intertidal regions, generally occurring in the +1.0 to -1.5 foot tide levels.
----------------------------------------------------------------------------------------------------
Document 2:
Prionitis lanceolata Harvey
----------------------------------------------------------------------------------------------------
Document 3:
Prionitis lanceolata
----------------------------------------------------------------------------------------------------
Document 4:
sewage outfalls.
P. lanceolata samples
sewage's effect on the primary productivity.
benthic marine algae.
MATERIALS AND METHODS
dispersion behavior of the sewage
immediate outfall area
= compression_retriever.get_relevant_documents("Does the paper mention where the species were observed or collected, and if so, what locations are given?")
compressed_docs pretty_print_docs(compressed_docs)
Document 1:
The paper mentions the collection of samples and the location, time of collection, and a description of the algae and the collection site were all noted on a collection sheet.
----------------------------------------------------------------------------------------------------
Document 2:
This paper presents the results of primary productivity measurements of P. lanceolata samples collected near and away from the outfall.
----------------------------------------------------------------------------------------------------
Document 3:
The benthic red alga Prionitis lanceolata Harvey shows an increased primary productivity in the outfall canyon of the Carmel, California, marine sewage outfall. The differing productivities of samples from Mission Point, Monastery Beach, and Cabrillo Point (Hopkins Marine Station) are compared in relation to the currents which were found.
= compression_retriever.get_relevant_documents("Can you give a more specific location?")
compressed_docs pretty_print_docs(compressed_docs)
Document 1:
A map of the entire Carmel Bay area, partially represented in figures 3, 4, was obtained from the California State Department of Beaches and Parks.
----------------------------------------------------------------------------------------------------
Document 2:
The benthic red alga Prionitis lanceolata Harvey shows an increased primary productivity in the outfall canyon of the Carmel, California, marine sewage outfall. At a distance of twenty meters from the outfall this productivity shows a large reduction before again rising to a high value at approximately 20 to 4O meters. The differing productivities of samples from Mission Point, Monastery Beach, and Cabrillo Point (Hopkins Marine Station) are compared in relation to the currents which were found.
= compression_retriever.get_relevant_documents("Are any coordinate locations given in latitude / longitude, and if so, what are they?")
compressed_docs pretty_print_docs(compressed_docs)
= compression_retriever.get_relevant_documents("In what habitat were the species found?")
compressed_docs pretty_print_docs(compressed_docs)
Document 1:
The benthic red alga Prionitis lanceolata Harvey shows an increased primary productivity in the outfall canyon of the Carmel, California, marine sewage outfall.
----------------------------------------------------------------------------------------------------
Document 2:
Monterey Bay, California, intertidal regions
----------------------------------------------------------------------------------------------------
Document 3:
sewage outfalls
----------------------------------------------------------------------------------------------------
Document 4:
Monastery Beach compared to those found for the algae at Mission Point to the north and the HMS control.
= compression_retriever.get_relevant_documents("Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?")
compressed_docs pretty_print_docs(compressed_docs)
Document 1:
The location, time of collection, tidal conditions, and a description of the algae and the collection site were all noted on a collection sheet (appendix).
= compression_retriever.get_relevant_documents("Are there any maps, figures, tables or diagrams in the paper?")
compressed_docs pretty_print_docs(compressed_docs)
Document 1:
A gridded map (figure 1) greatly aided in location and identification of algal samples.
A map of the entire Carmel Bay area, partially represented in figures 3, 4, was obtained from the California State Department of Beaches and Parks.
Compass readings (figure 2, tables I, II) were made at timed intervals from a suitable reference point. The bottles' actions were then plotted using this information (figures 3, 4).
----------------------------------------------------------------------------------------------------
Document 2:
Figure 2. All readings in tables I and II are in
degrees east of North. Recovered
bottles are designated * .
Figure 3. The initial positions and end points of the
bottles in current study 1 are designated @.
Recovery points are indicated by LA °
Scale 1:10,000
Figure 4. Initial and end positions of the marked
bottles are indicated by MJ in current
study 2. Recovered bottles are marked A ps
@ Secale 1:10,000
, Figure 5. Currents in the area immediate to the
outfall are indicated by the heavy black
arrowse Note the trend for the currents to
move across the outfall to the south and
generally miss the southern end of the
rocky area. Scale 1:20
= "What is this paper about?"
query = "Summarize the paper concisely with reference to materials and methods."
query2 = "Write a one sentence summary of the purpose of the paper"
query3 = "Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?"
query4 = "Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?"
query5 = "What are the scientific names of the species mentioned in this paper?"
query6 = "Does the paper mention where the species were observed or collected, and if so, what locations are given?"
query7 = "Can you give a more specific location?"
query8 = "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
query9 = "In what habitat were the species found?"
query10 = "Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?"
query11 = "Are there any maps, figures, tables or diagrams in the paper?" query12
Custom Prompts
You can pass in custom prompts to do question answering. These prompts are the same prompts as you can pass into the base question answering chain
# Build prompt
= """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
template Avoid pulling context from the literature cited section starting on page 10
{context}
Question: {question}
Helpful Answer:"""
= PromptTemplate.from_template(template) QA_CHAIN_PROMPT
RetrievalQA w/o Contextual Compression & Prompting
The RetrievalQAChain is a chain that combines a Retriever and a QA chain (described above). It is used to retrieve documents from a Retriever and then use a QA chain to answer a question based on the retrieved documents.
Here we used “stuff” chain type which is the most straightforward of the document chains. It takes a list of documents, inserts them all into a prompt and passes that prompt to an LLM.
We didnt wrap our retriever with Contextual Compression as to compare the results.
= ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
llm
= RetrievalQA.from_chain_type(llm, retriever = vector_store, chain_type = "stuff",chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}) qa_chain_stuff
= qa_chain_stuff({"query": query}) result
"result"] result[
'This paper is about the mechanics of fiddler crab claws and the need for future studies to improve experimental design and methodology.'
= qa_chain_stuff({"query": query2}) result2
"result"] result2[
'The paper discusses the need for future studies to address logistical issues and increase replication. It also suggests using multivariate and geometric morphometric methods to quantify claw shape instead of ratios. The acknowledgements section mentions individuals and organizations that provided advice and financial support for the study.'
= qa_chain_stuff({"query": query3}) result3
"result"] result3[
'The purpose of the paper is to discuss the logistical issues and limitations of a previous study on fiddler crab claw shape and suggest improvements for future studies.'
= qa_chain_stuff({"query": query4}) result4
"result"] result4[
"I don't know."
= qa_chain_stuff({"query": query5}) result5
"result"] result5[
'This paper contains experimental research conducted with organisms collected in nature.'
= qa_chain_stuff({"query": query6}) result6
"result"] result6[
'Hemigrapsus nudus, Hemigrapsus orogenensis, Littorina scutulata, Lacuna vincta.'
= qa_chain_stuff({"query": query7}) result7
"result"] result7[
'The paper mentions that the study was conducted at Friday Harbor Laboratories, University of Washington, Friday Harbor, WA 98250.'
= qa_chain_stuff({"query": query8}) result8
"result"] result8[
'No, the specific location is not provided in the given context.'
= qa_chain_stuff({"query": query9}) result9
"result"] result9[
'No, there are no coordinate locations given in latitude/longitude.'
= qa_chain_stuff({"query": query10}) result10
"result"] result10[
'The species were found in the rocky intertidal zone.'
= qa_chain_stuff({"query": query11}) result11
"result"] result11[
'No, the paper does not mention a year, date, or time that species were collected or observed.'
= qa_chain_stuff({"query": query12}) result12
"result"] result12[
'Yes, there is a figure in the paper.'
RetrievalQA with Contextual Compression
= LLMChainExtractor.from_llm(llm)
compressor
= ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store) compression_retriever
= RetrievalQA.from_chain_type(llm, retriever = compression_retriever, chain_type = "stuff",chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}) qa_chain_stuff_compressed
= qa_chain_stuff_compressed({"query": query}) result1a
"result"] result1a[
'This paper is about the differences in chela size between Hemigrapsus nudus and Hemigrapsus oregonensis and how it relates to snail predation.'
= qa_chain_stuff_compressed({"query": query2}) result2a
"result"] result2a[
'The paper discusses the limitations of a previous study due to logistical issues and lack of replication. It suggests that future studies should include more crabs, exclude non-feeding crabs, and have longer trial periods. Additionally, it recommends using multivariate and geometric morphometric methods to quantify claw shape instead of ratios.'
= qa_chain_stuff_compressed({"query": query3}) result3a
"result"] result3a[
'The purpose of the paper is to address the limitations of a previous study and propose improvements for future studies on crab feeding behavior and claw shape.'
= qa_chain_stuff_compressed({"query": query4}) result4a
"result"] result4a[
'Yes, the paper includes one or more observations.'
= qa_chain_stuff_compressed({"query": query5}) result5a
"result"] result5a[
'This paper contains both observational and experimental research conducted in the natural environment or with organisms collected in nature.'
= qa_chain_stuff_compressed({"query": query6}) result6a
"result"] result6a[
'Hemigrapsus nudus and Hemigrapsus oregonensis.'
= qa_chain_stuff_compressed({"query": query7}) result7a
"result"] result7a[
'The paper mentions that the species were observed and collected in Elkhorn Slough, Monterey County, California.'
= qa_chain_stuff_compressed({"query": query8}) result8a
"result"] result8a[
"I don't know."
= qa_chain_stuff_compressed({"query": query9}) result9a
"result"] result9a[
'No, there are no coordinate locations given in latitude/longitude.'
= qa_chain_stuff_compressed({"query": query10}) result10a
'result'] result10a[
'The species Lacuna vincta were found in False Bay and Dead Man Cove. The species Littorina scutulata were found in the rocky intertidal zone of Fourth of July Beach.'
= qa_chain_stuff_compressed({"query": query11}) result11a
"result"] result11a[
'The paper does not mention a year, date, or time that species were collected or observed.'
= qa_chain_stuff_compressed({"query": query12}) result12a
"result"] result12a[
'Yes, there are figures (Figure 1 and Figure 2) and tables (Table 1A and Table 1B) in the paper.'
RetrievalQA (Stuff) w/ Contextual Compression & Source Document
Additionally, we can return the source documents used to answer the question by specifying an optional parameter when constructing the chain.
= RetrievalQA.from_chain_type(llm, retriever = compression_retriever, chain_type = "stuff",chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}, return_source_documents = True) qa_chain_stuff_compressed_source
= qa_chain_stuff_compressed_source({"query": query}) result1b
"result"] result1b[
'This paper is about the variation in the mechanics of fiddler crab claws and the need for future studies to include more crabs, exclude non-feeding crabs, and use multivariate and geometric morphometric methods to quantify claw shape.'
"source_documents"] result1b[
[Document(page_content='Evolutionary variation in the mechanics of fiddler crab claws.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 11, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='due to logistical issues surrounding the experimental design and little replication. Future studies should include more crabs, the exclusion of non-feeding crabs, and trial periods with longer time intervals. Additionally, any subsequent study should utilize multivariate and geometric morphometric methods to quantify claw shape instead of using ratios, as two similar ratios may have different shape.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Snail Predation by Hemigrapsus nudus and Hemigrapsus oregonensis: Sex and Species \nDifferences in Chela Size.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 0, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query2}) result2b
"result"] result2b[
'The paper discusses the limitations of the experimental design and suggests improvements for future studies, such as increasing the number of crabs and excluding non-feeding individuals. It also recommends using multivariate and geometric morphometric methods to quantify claw shape instead of ratios. The paper references Table 1B for descriptive statistics.'
"source_documents"] result2b[
[Document(page_content='due to logistical issues surrounding the experimental design and little replication. Future studies should include more crabs, the exclusion of non-feeding crabs, and trial periods with longer time intervals. Additionally, any subsequent study should utilize multivariate and geometric morphometric methods to quantify claw shape instead of using ratios, as two similar ratios may have different shape.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Charifson 12', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 11, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Table 1B for descriptive statistics.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 15, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query3}) result3b
"result"] result3b[
'The purpose of the paper is to identify the factors that should be considered in future studies on crab feeding behavior and claw shape.'
"source_documents"] result3b[
[Document(page_content='due to logistical issues surrounding the experimental design and little replication. Future studies should include more crabs, the exclusion of non-feeding crabs, and trial periods with longer time intervals. Additionally, any subsequent study should utilize multivariate and geometric morphometric methods to quantify claw shape instead of using ratios, as two similar ratios may have different shape.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query4}) result4b
"result"] result4b[
"I don't know."
"source_documents"] result4b[
[Document(page_content='Friday Harbor Laboratories, The Libbie Hyman Scholarship, Society for Integrative and Comparative Biology, and the Stony Brook Department of Ecology and Evolution provided financial support and have my gratitude.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='O’Halloran, M. J. and O’Dor, R. K. 1988. Molt cycle of male snow crabs, Chionocetes opilio,\n \nfrom observations of external features, setal changes, and feeding behavior. Journal of\n \nCrustacean Biology 8: 164-176.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 10, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query5}) result5b
"result"] result5b[
'This paper contains both observational and experimental research conducted in the natural environment or with organisms collected in nature.'
'source_documents'] result5b[
[Document(page_content='Friday Harbor Laboratories, The Libbie Hyman Scholarship, Society for Integrative and Comparative Biology, and the Stony Brook Department of Ecology and Evolution provided financial support and have my gratitude.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Behrens Yamada, S. and Boulding, E. 1996. The role of highly mobile crab predators in the intertidal zonation of their gastropod prey. Journal of Experimental Marine Biology and Ecology 204: 59-83.\nBehrens Yamada, S. and Boulding, E. 1998. Claw morphology, prey size selection and foraging efficiency in generalist and specialist shell breaking crabs. Journal of Experimental Marine Biology and Ecology 220: 191-211.\nKeppel, E. and Scrosati, R. 2004. Chemically mediated avoidance of Hemigrapsus nudus (Crustacea) by Littorina scutulata (Gastropoda): effects of species coexistence and variable cues.\nO’Halloran, M. J. and O’Dor, R. K. 1988. Molt cycle of male snow crabs, Chionocetes opilio, from observations of external features, setal changes, and feeding behavior.\nSliger, M. C. 1982. Distribution and microhabitat selection of Hemigrapsus oregonensis (Dana) and Pachygrapsus crassipes (Randall) in Elkhorn Slough, Monterey County, California. M.S. thesis, California State University, Hayward. 75 pgs.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 10, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query6}) result6b
'result'] result6b[
'Hemigrapsus nudus, Hemigrapsus oregonensis.'
'source_documents'] result6b[
[Document(page_content='BMC Evolutionary Biology 13: 137.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 11, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Hemigrapsus nudus, Hemigrapsus oregonensis', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 0, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Behrens Yamada, S. and Boulding, E. 1996. The role of highly mobile crab predators in the intertidal zonation of their gastropod prey. Journal of Experimental Marine Biology and Ecology 204: 59-83. \nBehrens Yamada, S. and Boulding, E. 1998. Claw morphology, prey size selection and foraging efficiency in generalist and specialist shell breaking crabs. Journal of Experimental Marine Biology and Ecology 220: 191-211. \nKeppel, E. and Scrosati, R. 2004. Chemically mediated avoidance of Hemigrapsus nudus (Crustacea) by Littorina scutulata (Gastropoda): effects of species coexistence and variable cues. Animal Behavior 68: 915-920. \nKozloff, E. N. 1987. Marine invertebrates of the Pacific Northwest. University of Washington Press, Seattle, Washington, USA. \nO’Halloran, M. J. and O’Dor, R. K. 1988. Molt cycle of male snow crabs, Chionocetes opilio, from observations of external features, setal changes, and feeding behavior. Journal of Crustacean Biology 8: 164-176. \nSliger, M. C. 1982. Distribution and microhabitat selection of Hemigrapsus oregonensis (Dana) and Pachygrapsus crassipes (Randall) in Elkhorn Slough, Monterey County, California. M.S. thesis, California State University, Hayward. 75 pgs. \nSokol, R. R. and Rohlf, F. J. 2011. Biometry. 4th ed. W. H. Freeman, New York, New York, USA. \nStuart, Y. E. and Losos, J. B. 2013. Ecological character displacement: glass half full or half empty? Trends in Ecology and Evolution 28: 402-408.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 10, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query7}) result7b
"result"] result7b[
'The paper mentions that the species Hemigrapsus oregonensis and Pachygrapsus crassipes were observed and collected in Elkhorn Slough, Monterey County, California.'
"source_documents"] result7b[
[Document(page_content='Friday Harbor Laboratories', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 9, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Sliger, M. C. 1982. Distribution and microhabitat selection of Hemigrapsus oregonensis (Dana)\n \nand Pachygrapsus crassipes (Randall) in Elkhorn Slough, Monterey County, California.\n \nM.S. thesis, California State University, Hayward. 75 pgs.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 10, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query8}) result8b
"result"] result8b[
'The study was conducted in a rural area in northern California.'
"source_documents"] result8b[
[]
= qa_chain_stuff_compressed_source({"query": query9}) result9b
"result"] result9b[
'No, there are no coordinate locations given in latitude/longitude in the provided context.'
"source_documents"] result9b[
[]
= qa_chain_stuff_compressed_source({"query": query10}) result10b
"result"] result10b[
'The species Lacuna vincta were found in False Bay and Dead Man Cove. The species Littorina scutulata were found in the rocky intertidal zone of Fourth of July Beach.'
"source_documents"] result10b[
[Document(page_content='All specimens were collected on San Juan Island, Washington. Lacuna vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and Dead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were collected from the rocky intertidal zone of Fourth of July Beach.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 3, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
= qa_chain_stuff_compressed_source({"query": query11}) result11b
"result"] result11b[
'The paper does not mention a year, date, or time that species were collected or observed.'
"source_documents"] result11b[
[]
= qa_chain_stuff_compressed_source({"query": query12}) result12b
"result"] result12b[
'Yes, there are figures in the paper.'
"source_documents"] result12b[
[Document(page_content='Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 14, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''}),
Document(page_content='Figure 2: Relationship of carapace width and propal width in Hemigrapsus. \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis.', metadata={'source': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'file_path': 'hms/fhl_2014_Charifson_34622 (1).pdf', 'page': 15, 'total_pages': 19, 'format': 'PDF 1.5', 'title': '', 'author': 'David', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Office Word 2007', 'producer': 'Microsoft® Office Word 2007', 'creationDate': "D:20140723120649-04'00'", 'modDate': "D:20140723120649-04'00'", 'trapped': ''})]
ConversationalRetrievalChain + Chat History
The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
ConversationalRetrievalChain = conversation memory + RetrievalQAChain
It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question answering chain to return a response.
= FAISS.from_documents(docs2, embeddings).as_retriever(search_type = "similarity", search_kwargs= {"k": 3}) vector_store2
= LLMChainExtractor.from_llm(llm)
compressor
= ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store2) compression_retriever2
What is memory?
Definition: Memory is an agent’s capacity of remembering previous interactions with the user (think chatbots)
The official definition of memory is the following:
By default, Chains and Agents are stateless, meaning that they treat each incoming query independently. In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions, both at a short term but also at a long term level. The concept of “Memory” exists to do exactly that.
The ConversationBufferMemory is the most straightforward conversational memory in LangChain. As we described above, the raw input of the past conversation between the human and AI is passed — in its raw form — to the {history} parameter.
# We can now create a memory object, which is necessary to track the
# inputs/outputs and hold a conversation.
from langchain.memory import ConversationBufferMemory
= ConversationBufferMemory(llm=llm, input_key='question', output_key='answer',memory_key="chat_history", return_messages=True) memory
#Initialize the ConversationalRetrievalChain
= ConversationalRetrievalChain.from_llm(ChatOpenAI(),
qa_chain
compression_retriever2,= memory)
memory
#Intialize empty list to append chat history
= [] chat_history
= qa_chain({"question": query, "chat_history": chat_history}) result1c
C:\Users\aclao89\AppData\Local\anaconda3\lib\site-packages\langchain\chains\llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.
warnings.warn(
# What is this paper about?
"answer"] result1c[
'This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.'
"chat_history"] result1c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False)]
= "What were some key differences in chela size between sex and species?" query1c
= qa_chain({"question": query1c, "chat_history": chat_history}) result1ca
# "What were some key differences in chela size between sex and species?"
"answer"] result1ca[
'The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.'
"chat_history"] result1ca[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False)]
= qa_chain({"question": query2, "chat_history": chat_history}) result2c
# "Summarize the paper concisely with reference to materials and methods."
"answer"] result2c[
"I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used."
"chat_history"] result2c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False)]
= qa_chain({"question": query3, "chat_history": chat_history}) result3c
# "Write a one sentence summary of the purpose of the paper"
"answer"] result3c[
'The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.'
"chat_history"] result3c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False)]
= qa_chain({"question": query4, "chat_history": chat_history}) result4c
# "Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?"
"answer"] result4c[
'Based on the given context, it is not mentioned whether the paper includes one or more observations.'
"chat_history"] result4c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False)]
= qa_chain({"question": query5, "chat_history": chat_history}) result5c
# Does this paper contain observational or experimental research conducted in the natural environment
# or with organisms collected in nature?"
"answer"] result5c[
"I don't know, as the provided context does not specify the content of the paper or the type of research conducted."
"chat_history"] result5c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not possible to determine whether the paper involved observational or experimental research conducted in the natural environment or with organisms collected in nature.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content="I don't know, as the provided context does not specify the content of the paper or the type of research conducted.", additional_kwargs={}, example=False)]
= qa_chain({"question": query6, "chat_history": chat_history}) result6c
# "What are the scientific names of the collected species mentioned in this paper?"
"answer"] result6c[
'The scientific names mentioned in the paper are:\n\n- rapsus nudus: This is not a recognized scientific name. It may be a typographical error or a species name not widely recognized in the scientific community.\n\n- Hemigrapsus orogenensis: This is a recognized scientific name for a species of crab.\n\n- Littorina scutulata: This is a recognized scientific name for a species of periwinkle snail.\n\n- Lacuna vincta: This is a recognized scientific name for a species of sea snail.'
"chat_history"] result6c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not possible to determine whether the paper involved observational or experimental research conducted in the natural environment or with organisms collected in nature.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content="I don't know, as the provided context does not specify the content of the paper or the type of research conducted.", additional_kwargs={}, example=False),
HumanMessage(content='What are the scientific names of the collected species mentioned in this paper?', additional_kwargs={}, example=False),
AIMessage(content='The scientific names mentioned in the paper are:\n\n- rapsus nudus: This is not a recognized scientific name. It may be a typographical error or a species name not widely recognized in the scientific community.\n\n- Hemigrapsus orogenensis: This is a recognized scientific name for a species of crab.\n\n- Littorina scutulata: This is a recognized scientific name for a species of periwinkle snail.\n\n- Lacuna vincta: This is a recognized scientific name for a species of sea snail.', additional_kwargs={}, example=False)]
= qa_chain({"question": query7, "chat_history": chat_history}) result7c
# "Please list the species collected based on their scientific names along with location of collection."
"answer"] result7c[
'Yes, the paper mentions the location "khorn Slough, Monterey County, California" where the species were observed or collected.'
"chat_history"] result7c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not possible to determine whether the paper involved observational or experimental research conducted in the natural environment or with organisms collected in nature.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content="I don't know, as the provided context does not specify the content of the paper or the type of research conducted.", additional_kwargs={}, example=False),
HumanMessage(content='What are the scientific names of the collected species mentioned in this paper?', additional_kwargs={}, example=False),
AIMessage(content='The scientific names mentioned in the paper are:\n\n- rapsus nudus: This is not a recognized scientific name. It may be a typographical error or a species name not widely recognized in the scientific community.\n\n- Hemigrapsus orogenensis: This is a recognized scientific name for a species of crab.\n\n- Littorina scutulata: This is a recognized scientific name for a species of periwinkle snail.\n\n- Lacuna vincta: This is a recognized scientific name for a species of sea snail.', additional_kwargs={}, example=False),
HumanMessage(content='Does the paper mention where the species were observed or collected, and if so, what locations are given?', additional_kwargs={}, example=False),
AIMessage(content='Yes, the paper mentions the location "khorn Slough, Monterey County, California" where the species were observed or collected.', additional_kwargs={}, example=False)]
= qa_chain({"question": query8, "chat_history": chat_history}) result8c
# "Can you give a more specific location?"
"answer"] result8c[
'No, the paper does not provide a more specific location for where the species were observed or collected.'
"chat_history"] result8c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not possible to determine whether the paper involved observational or experimental research conducted in the natural environment or with organisms collected in nature.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content="I don't know, as the provided context does not specify the content of the paper or the type of research conducted.", additional_kwargs={}, example=False),
HumanMessage(content='What are the scientific names of the collected species mentioned in this paper?', additional_kwargs={}, example=False),
AIMessage(content='The scientific names mentioned in the paper are:\n\n- rapsus nudus: This is not a recognized scientific name. It may be a typographical error or a species name not widely recognized in the scientific community.\n\n- Hemigrapsus orogenensis: This is a recognized scientific name for a species of crab.\n\n- Littorina scutulata: This is a recognized scientific name for a species of periwinkle snail.\n\n- Lacuna vincta: This is a recognized scientific name for a species of sea snail.', additional_kwargs={}, example=False),
HumanMessage(content='Does the paper mention where the species were observed or collected, and if so, what locations are given?', additional_kwargs={}, example=False),
AIMessage(content='Yes, the paper mentions the location "khorn Slough, Monterey County, California" where the species were observed or collected.', additional_kwargs={}, example=False),
HumanMessage(content='Can you give a more specific location?', additional_kwargs={}, example=False),
AIMessage(content='No, the paper does not provide a more specific location for where the species were observed or collected.', additional_kwargs={}, example=False)]
= qa_chain({"question": query9, "chat_history": chat_history}) result9c
# "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
"answer"] result9c[
"I'm sorry, but I don't have access to the specific paper or any information about it. Therefore, I cannot provide you with the coordinate locations given in latitude/longitude."
"chat_history"] result9c[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about evolutionary variation in the mechanics of fiddler crab claws. It likely explores how different species or populations of fiddler crabs have developed unique claw structures and functions over time through the process of evolution.', additional_kwargs={}, example=False),
HumanMessage(content='What were some key differences in chela size between sex and species?', additional_kwargs={}, example=False),
AIMessage(content='The study found that there were statistically significant differences in chela size between sexes and species. In Hemigrapsus nudus, female crabs had a high variance in chela size parameters, with carapace width explaining a high percentage of the variance in propal height and propal width. In Hemigrapsus oregonensis, male crabs had the greatest variation in chela size parameters. Overall, carapace width was found to have a greater ability to explain the variance in chela size parameters in females of both crab species.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the full paper and cannot provide a concise summary or details about the materials and methods used.", additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='The purpose of the paper was to address the limitations of a previous study and suggest improvements for future studies on crabs, including increasing the number of crabs, excluding non-feeding crabs, and extending the trial periods.', additional_kwargs={}, example=False),
HumanMessage(content='Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not mentioned whether the paper includes one or more observations.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content='Based on the given context, it is not possible to determine whether the paper involved observational or experimental research conducted in the natural environment or with organisms collected in nature.', additional_kwargs={}, example=False),
HumanMessage(content='Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?', additional_kwargs={}, example=False),
AIMessage(content="I don't know, as the provided context does not specify the content of the paper or the type of research conducted.", additional_kwargs={}, example=False),
HumanMessage(content='What are the scientific names of the collected species mentioned in this paper?', additional_kwargs={}, example=False),
AIMessage(content='The scientific names mentioned in the paper are:\n\n- rapsus nudus: This is not a recognized scientific name. It may be a typographical error or a species name not widely recognized in the scientific community.\n\n- Hemigrapsus orogenensis: This is a recognized scientific name for a species of crab.\n\n- Littorina scutulata: This is a recognized scientific name for a species of periwinkle snail.\n\n- Lacuna vincta: This is a recognized scientific name for a species of sea snail.', additional_kwargs={}, example=False),
HumanMessage(content='Does the paper mention where the species were observed or collected, and if so, what locations are given?', additional_kwargs={}, example=False),
AIMessage(content='Yes, the paper mentions the location "khorn Slough, Monterey County, California" where the species were observed or collected.', additional_kwargs={}, example=False),
HumanMessage(content='Can you give a more specific location?', additional_kwargs={}, example=False),
AIMessage(content='No, the paper does not provide a more specific location for where the species were observed or collected.', additional_kwargs={}, example=False),
HumanMessage(content='Are any coordinate locations given in latitude / longitude, and if so, what are they?', additional_kwargs={}, example=False),
AIMessage(content="I'm sorry, but I don't have access to the specific paper or any information about it. Therefore, I cannot provide you with the coordinate locations given in latitude/longitude.", additional_kwargs={}, example=False)]
= qa_chain({"question": query10, "chat_history": chat_history}) result10c
# "Can you give a more specific location of the habitat such as formal name of said location?"
"answer"] result10c[
'Lacuna vincta was found at False Bay and Dead Man Cove, while Littorina scutulata was found at Fourth of July Beach.'
= qa_chain({"question": query11, "chat_history": chat_history}) result11c
# "Does the paper mention a year, date and/or time that species were collected or observed,
# and if so, what was mentioned?
"answer"] result11c[
'The given context does not provide any information regarding the collection or observation of the species, such as the year, date, or time.'
= qa_chain({"question": query12, "chat_history": chat_history}) result12c
# "Are there any maps, figures, tables or diagrams in the paper?"
"answer"] result12c[
'Yes, the paper includes two figures: Figure 2 shows the relationship of carapace width and propal width in Hemigrapsus, and Figure 1 shows the relationship of carapace width and propal height in Hemigrapsus. The figures include lines of best fit from SMA regression. The paper also mentions Table 1A and Table 1B, which likely contain descriptive statistics related to the figures.'
ConversationalRetrievalChain Similarity Search w/ ConversationBufferMemory
= TokenTextSplitter(chunk_size = 800, chunk_overlap = 50)
tk_text_splitter
= tk_text_splitter.split_text(new_pdf_text) docs3
= FAISS.from_texts(docs3, embeddings).as_retriever(search_type = "similarity", search_kwargs= {"k": 3}) vector_store2
= ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
llm
= LLMChainExtractor.from_llm(llm)
compressor
= ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store2) compression_retriever2
= """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Please use a maximum of 4 sentences. If you do not know the answer reply with 'I am sorry'.
custom_template 1. SPECIES NAME: The scientific name (species name) of any plant, animal, fungus, alga or bacterium consists of two Latinized words. The first word is the name of the genus to which the organism belongs. The second word is the specific epithet or specific term of the species. Together, the genus plus the specific epithet make up the species name. The species name and scientific name are synonyms.
2. HABITAT: A species habitat can be seen as the physical manifestation of its ecological niche.
3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.
Examples:
1. Sentence: Strongylocentrotus fransiscanus and S. purpuratus were obtained from the subtidal and intertidal regions, respectively, of Monterey Bay.
"Output: {{'SPECIES NAME': ['Strongylocentrotus fransiscanus', 'S. purpuratus'], 'HABITAT': ['subtidal', 'intertidal'], 'LOC': ['Monterey Bay']}}
2. Sentence: Cucumaria curata and C. pseudocurata live and feed in the hydrodynamically stressful environment of exposed intertidal areas.
Output: {{'SPECIES NAME': ['Cucumaria curata', 'C. pseudocurata'], 'HABITAT': ['exposed intertidal'], 'LOC': ['None']}}\n"
Chat History:
{chat_history}
Follow Up Input: {question}
inputVariables: ["context", "question", "chat_history"]
"""
= PromptTemplate.from_template(custom_template) CUSTOM_QUESTION_PROMPT
# We can now create a memory object, which is necessary to track the
# inputs/outputs and hold a conversation.
from langchain.memory import ConversationBufferMemory
= ConversationBufferMemory(llm=llm, input_key='question', output_key='answer', memory_key="chat_history", return_messages=True) memory
# We now initialize the ConversationalRetrievalChain
= ConversationalRetrievalChain.from_llm(ChatOpenAI(),
qa_chain_2
compression_retriever2,= memory,
memory = True,
return_source_documents = CUSTOM_QUESTION_PROMPT)
condense_question_prompt
= "What is this paper about?"
query = "What were the significant findings on the relationship mentioned?"
query1a = "Summarize the paper concisely with reference to materials and methods."
query2 = "Write a one sentence summary of the purpose of the paper"
query3 = "Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?"
query4 = "Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?"
query5 = "What are the scientific names of the collected species mentioned in this paper?"
query6 = "Were there other species collected?"
query6a = "Does the paper mention where the species were observed or collected, and if so, what locations are given?"
query7 = "Please list the species collected based on their scientific names along with location of collection."
query7a = "Can you give a more specific location?"
query8 = "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
query9 = "In what habitat were the species found?"
query10 = "Can you give a more specific location of the habitat such as formal name of said location?"
query10a = "Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?"
query11 = "Are there any maps, figures, tables or diagrams in the paper?" query12
= [] chat_history
= qa_chain_2({"question": query, "chat_history": chat_history}) result1a
#"What is this paper about?"
"answer"] result1a[
'This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.'
"source_documents"] result1a[
[Document(page_content='Mean Consumption Rate \n(Snails Consumed/Hour) \nIndividual H. nudus', metadata={}),
Document(page_content='A few interesting patterns emerged in the morphometric analysis of Hemigrapsus nudus and Hemigrapsus orogenensis. The variance in propal height or propal width explained by carapace width was surprisingly high in female H. nudus with R2 values of 0.976 found for propal height and 0.927 for propal width. Male H. oregenensis had the greatest variation in chela size parameters, demonstrated by R2 of 0.693 and 0.534 for propal height and propal width respectively. The R2 values for propal width regressed against carapace width were consistently lower than propal height regressed against carapace width across species and sexes (Table 1). In general the ability of carapace width to explain the variance in chela size parameters (i.', metadata={}),
Document(page_content='The relationship between propus size and use of snails as prey in sympatric populations \nof Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal \nwidth increases with body size in a predictable manner in male and female crabs of both species. \nSexual dimorphism of propal height and width relationships in both shore crabs is apparent, but \nthere was no detectible difference in chela size between H. nudus and H. oregonensis. The lack \nof difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is \nno character displacement in this trait. Both species of crabs showed a strong feeding preference \nfor the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely \ndue to the ease of consumption. There were no differences in snail consumption rates between \nmale and female H. nudus of similar propal heights, mostly due to high feeding variation among \nindividual crabs.', metadata={})]
"chat_history"] result1a[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False)]
= qa_chain_2({"question": query, "chat_history": chat_history}) result1b
# What were the significant findings on the relationship mentioned
"answer"] result1b[
'The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.'
"source_documents"] result1b[
[Document(page_content='The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', metadata={}),
Document(page_content='The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', metadata={}),
Document(page_content='This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. It investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', metadata={})]
"chat_history"] result1b[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False),
HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False)]
= qa_chain_2({"question": query2, "chat_history": chat_history}) result2b
#"Summarize the paper concisely with reference to materials and methods."
"answer"] result2b[
'The materials and methods used in this study involved measuring the carapace width, propal height, and propal width of female and male H. oregonensis and H. nudus. The relationship between carapace width and propal height/width was analyzed using SMA regression. Consumption rates of individual H. nudus were also recorded in 8 trials. Statistical analyses were conducted to determine the significance of sex, species, and their interaction on the carapace width to propal height/width ratios.'
"source_documents"] result2b[
[Document(page_content='Mean Consumption Rate \n(Snails Consumed/Hour) \nIndividual H. nudus', metadata={}),
Document(page_content='Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis. \n\nFigure 2: Relationship of carapace width and propal width in Hemigrapsus. \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis. \n\nFigure 3: Differences in propal height:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was \ninsignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042). \nError bars represent standard error of the mean. \n\nFigure 4: Differences in propal width:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F1,45 = 103.8. p < 0.001), while the species factor was \ninsignificant (F1,45 = 0.09, p = 0.764). There was a significant interaction (F1,45 = 7.19, p = \n0.01). Error bars represent standard error of the mean. \n\nFigure 5: Consumption rates by individual H. nudus. \nMean consumption rates (n = 8 trials) of 3 female (Fe1 to Fe3) and 3 male (Ma1 to Ma3) H. \nnudus. Crabs Fe1, Fe3, and Ma3 did not consume snails. The individuals that eat snails did not \ndiffer in their consumption rates (F2,21 = 2.52, p = 0.104). Error bars represent standard error of \nthe mean.', metadata={}),
Document(page_content='ace width was a good predictor of both propal height (Figure 1, Table 1A) and \npropal width (Figure 2, Table 1B) in female and male H. oregonensis and H. nudus. There was \nless variation in the relationship between carapace width and propal height than with propal \nwidth. Due to non-normality of CW:PH and CW:PW an arcsin transformation was used. A \nsignificant effect for sex was found for both CW:PH (F1, 45 = 125.6, p < 0.001) and CW:PW (F1, \n45 = 103.81, p < 0.001). There was no significant difference between Hemigrapsus species for \nCW:PH (F1, 45 < 0.01, p = 0.983) and CW:PW (F1, 45 = 0.09, p = 0.764). A significant \nsex*species interaction was detected for both CW:PH (F1, 45 4.39, p = 0.042) and CW:PW (F1, 45 \n= 7.19, p = 0.010). Figure 3 and 4 show the means of CW:PH and CW:PW in by sex and species \nrespectively.', metadata={})]
"chat_history"] result2b[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False),
HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content='The materials and methods used in this study involved measuring the carapace width, propal height, and propal width of female and male H. oregonensis and H. nudus. The relationship between carapace width and propal height/width was analyzed using SMA regression. Consumption rates of individual H. nudus were also recorded in 8 trials. Statistical analyses were conducted to determine the significance of sex, species, and their interaction on the carapace width to propal height/width ratios.', additional_kwargs={}, example=False)]
= qa_chain_2({"question": query3, "chat_history": chat_history}) result3b
#"Write a one sentence summary of the purpose of the paper"
"answer"] result3b[
'Based on the provided context, it is not possible to determine the purpose or main objective of the paper.'
"source_documents"] result3b[
[Document(page_content='Table 1: SMA regressions of carapace width and propus measures. \nA) The relationship between carapace width and propal height. x is carapace width and y is \npropal height. B) The relationship between carapace width and propal height. x is carapace width \nand y is propal width.', metadata={})]
"chat_history"] result3b[
[HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='This paper is about the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study investigates the relationship between body size and propus size in these crabs, as well as the feeding preference for different types of snails. The paper also compares the chela size (claw size) between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False),
HumanMessage(content='What is this paper about?', additional_kwargs={}, example=False),
AIMessage(content='The paper investigates the relationship between body size and propus size in Hemigrapsus nudus and Hemigrapsus oregonensis crabs. It also compares the chela size between the two species and explores the potential for character displacement. Additionally, the paper discusses the variation in snail consumption rates among individual crabs.', additional_kwargs={}, example=False),
HumanMessage(content='Summarize the paper concisely with reference to materials and methods.', additional_kwargs={}, example=False),
AIMessage(content='The materials and methods used in this study involved measuring the carapace width, propal height, and propal width of female and male H. oregonensis and H. nudus. The relationship between carapace width and propal height/width was analyzed using SMA regression. Consumption rates of individual H. nudus were also recorded in 8 trials. Statistical analyses were conducted to determine the significance of sex, species, and their interaction on the carapace width to propal height/width ratios.', additional_kwargs={}, example=False),
HumanMessage(content='Write a one sentence summary of the purpose of the paper', additional_kwargs={}, example=False),
AIMessage(content='Based on the provided context, it is not possible to determine the purpose or main objective of the paper.', additional_kwargs={}, example=False)]
= qa_chain_2({"question": query4, "chat_history": chat_history}) result4b
#"Terms that may be used to identify an observation include “in the field”,
#“this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”,
#“harvested”, “found”, etc. Does the paper include one or more observations?"
"answer"] result4b[
'Yes, the paper includes multiple observations.'
"source_documents"] result4b[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='There were no detectible difference in chela size between H. nudus and H. oregonensis. Both species of crabs showed a strong feeding preference for the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata. There were no differences in snail consumption rates between male and female H. nudus of similar propal heights.', metadata={})]
= qa_chain_2({"question": query5, "chat_history": chat_history}) result5b
#"Does this paper contain observational or experimental research conducted in the natural environment
#or with organisms collected in nature?"
"answer"] result5b[
'Based on the given context, it appears that the research described in the paper involves observational research conducted in the natural environment. The researchers collected snails from different locations in the natural habitat and observed the feeding preferences of the crabs in their natural environment.'
"source_documents"] result5b[
[Document(page_content='The relationship between propus size and use of snails as prey in sympatric populations \nof Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Both species of crabs showed a strong feeding preference \nfor the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely \ndue to the ease of consumption. There were no differences in snail consumption rates between \nmale and female H. nudus of similar propal heights, mostly due to high feeding variation among \nindividual crabs.', metadata={}),
Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={})]
= qa_chain_2({"question": query6, "chat_history": chat_history}) result6b
# "What are the scientific names of the collected species mentioned in this paper?"
"answer"] result6b[
'The scientific names of the collected species mentioned in the paper are as follows:\n\n- Lacuna vincta\n- Littorina scutulata\n- Hemigrapsus nudus\n- Hemigrapsus oregonensis'
"source_documents"] result6b[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='Hemigrapsus nudus, Hemigrapsus oregonensis, Littorina scutulata, Lacuna vincta', metadata={})]
= qa_chain_2({"question": query7, "chat_history": chat_history}) result7b
#"Does the paper mention where the species were observed or collected, and if so, what locations are given?"
"answer"] result7b[
'Yes, the paper mentions the locations where the species were collected. Lacuna vincta was collected at False Bay and Dead Man Cove, while Littorina scutulata was collected from the rocky intertidal zone of Fourth of July Beach.'
"source_documents"] result7b[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='Both species of crabs showed a strong feeding preference for the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely due to the ease of consumption.', metadata={})]
= qa_chain_2({"question": query7a, "chat_history": chat_history}) result7c
# "Can you give a more specific location?"
"answer"] result7c[
'- Lacuna vincta: collected at False Bay and Dead Man Cove.\n- Littorina scutulata: collected from the rocky intertidal zone of Fourth of July Beach.\n- Hemigrapsus nudus and Hemigrapsus oregonensis: The specific locations of collection are not mentioned in the given context.'
= qa_chain_2({"question": query8, "chat_history": chat_history}) result8b
"answer"] result8b[
'Yes, the una vincta gastropods were collected at False Bay and Dead Man Cove, while the Littorina scutulata gastropods were collected from the rocky intertidal zone of Fourth of July Beach.'
"source_documents"] result8b[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={})]
= qa_chain_2({"question": query9, "chat_history": chat_history}) result9b
# "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
"answer"] result9b[
'Yes, there are coordinate locations given in latitude/longitude.'
"source_documents"] result9b[
[]
= qa_chain_2({"question": query10, "chat_history": chat_history}) result10b
# "In what habitat were the species found?"
"answer"] result10b[
'The species were found in the rocky intertidal zone and adjacent intertidal areas of False Bay, Dead Man Cove, Fourth of July Beach, and the intertidal adjacent to the Friday Harbor Laboratories dock on San Juan Island, Washington.'
"source_documents"] result10b[
[Document(page_content='There is still considerable habitat overlap between these two species; the underside of a single rock may have roughly equal abundances of the two crab species (personal observation).', metadata={}),
Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='All specimens were collected on San Juan Island, Washington. Two shore crabs (clade: \nBrachyura: Family Grapsidae), Hemigrapsus oregonensis and Hemigrapsus nudus were \ncollected in the intertidal adjacent to the Friday Harbor Laboratories dock.', metadata={})]
= qa_chain_2({"question": query10a, "chat_history": chat_history}) result10c
# "Can you give a more specific location of the habitat such as formal name of said location?"
"answer"] result10c[
'Yes, the specific locations mentioned in the context are False Bay, Dead Man Cove, and Fourth of July Beach, all of which are on San Juan Island, Washington. There is no mention of a formal name for these locations.'
"source_documents"] result10c[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='All specimens were collected on San Juan Island, Washington. Two shore crabs (clade: Brachyura: Family Grapsidae), Hemigrapsus oregonensis and Hemigrapsus nudus were collected in the intertidal adjacent to the Friday Harbor Laboratories dock.', metadata={})]
= qa_chain_2({"question": query11, "chat_history": chat_history}) result11b
# "Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?"
"answer"] result11b[
'Yes, the paper mentions the year as "Summer 2014". However, it does not provide specific dates or times for when the species were collected or observed.'
'source_documents'] result11b[
[Document(page_content='una vincta, in the Class Gastropoda and Family Littoridae, were collected at False Bay and \nDead Man Cove. Littorina scutulata, in the Class Gastropoda and Family Littoridae, were \ncollected from the rocky intertidal zone of Fourth of July Beach.', metadata={}),
Document(page_content='Summer 2014', metadata={})]
= qa_chain_2({"question": query12, "chat_history": chat_history}) result12b
"answer"] result12b[
'Yes, there are figures and tables in the paper. Specifically, there are Figure 1, Table 1A, Figure 2, Table 1B, Figure 3, and Figure 4.'
# "Are there any maps, figures, tables or diagrams in the paper?"
"source_documents"] result12b[
[Document(page_content='Figure 1, Table 1A, Figure 2, Table 1B, Figure 3, Figure 4', metadata={}),
Document(page_content='Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis. \n\nFigure 2: Relationship of carapace width and propal width in Hemigrapsus. \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis. \n\nFigure 3: Differences in propal height:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was \ninsignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042). \nError bars represent standard error of the mean. \n\nFigure 4: Differences in propal width:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F1,45 = 103.8. p < 0.001), while the species factor was \ninsignificant (F1,45 = 0.09, p = 0.764). There was a significant interaction (F1,45 = 7.19, p = \n0.01). Error bars represent standard error of the mean. \n\nFigure 5: Consumption rates by individual H. nudus. \nMean consumption rates (n = 8 trials) of 3 female (Fe1 to Fe3) and 3 male (Ma1 to Ma3) H. \nnudus. Crabs Fe1, Fe3, and Ma3 did not consume snails. The individuals that eat snails did not \ndiffer in their consumption rates (F2,21 = 2.52, p = 0.104). Error bars represent standard error of \nthe mean.', metadata={})]